import cheerio from 'cheerio'; import get from 'lodash/get'; import type { INodeExecutionData, IExecuteFunctions, INodeType, INodeTypeDescription, IDataObject, INodeProperties, } from 'n8n-workflow'; import { NodeConnectionType, NodeOperationError } from 'n8n-workflow'; import { getResolvables, sanitizeDataPathKey } from '@utils/utilities'; import { placeholder } from './placeholder'; import type { IValueData } from './types'; import { getValue } from './utils'; export const capitalizeHeader = (header: string, capitalize?: boolean) => { if (!capitalize) return header; return header .split('_') .filter((word) => word) .map((word) => word[0].toUpperCase() + word.slice(1)) .join(' '); }; const extractionValuesCollection: INodeProperties = { displayName: 'Extraction Values', name: 'extractionValues', placeholder: 'Add Value', type: 'fixedCollection', typeOptions: { multipleValues: true, }, default: {}, options: [ { name: 'values', displayName: 'Values', values: [ { displayName: 'Key', name: 'key', type: 'string', default: '', description: 'The key under which the extracted value should be saved', }, { displayName: 'CSS Selector', name: 'cssSelector', type: 'string', default: '', placeholder: '.price', description: 'The CSS selector to use', }, { displayName: 'Return Value', name: 'returnValue', type: 'options', options: [ { name: 'Attribute', value: 'attribute', description: 'Get an attribute value like "class" from an element', }, { name: 'HTML', value: 'html', description: 'Get the HTML the element contains', }, { name: 'Text', value: 'text', description: 'Get only the text content of the element', }, { name: 'Value', value: 'value', description: 'Get value of an input, select or textarea', }, ], default: 'text', description: 'What kind of data should be returned', }, { displayName: 'Attribute', name: 'attribute', type: 'string', displayOptions: { show: { returnValue: ['attribute'], }, }, default: '', placeholder: 'class', description: 'The name of the attribute to return the value off', }, { displayName: 'Skip Selectors', name: 'skipSelectors', type: 'string', displayOptions: { show: { returnValue: ['text'], '@version': [{ _cnd: { gt: 1.1 } }], }, }, default: '', placeholder: 'e.g. img, .className, #ItemId', description: 'Comma-separated list of selectors to skip in the text extraction', }, { displayName: 'Return Array', name: 'returnArray', type: 'boolean', default: false, description: 'Whether to return the values as an array so if multiple ones get found they also get returned separately. If not set all will be returned as a single string.', }, ], }, ], }; export class Html implements INodeType { description: INodeTypeDescription = { displayName: 'HTML', name: 'html', icon: { light: 'file:html.svg', dark: 'file:html.dark.svg' }, group: ['transform'], version: [1, 1.1, 1.2], subtitle: '={{ $parameter["operation"] }}', description: 'Work with HTML', defaults: { name: 'HTML', }, inputs: [NodeConnectionType.Main], outputs: [NodeConnectionType.Main], parameterPane: 'wide', properties: [ { displayName: 'Operation', name: 'operation', type: 'options', noDataExpression: true, options: [ { name: 'Generate HTML Template', value: 'generateHtmlTemplate', action: 'Generate HTML template', }, { name: 'Extract HTML Content', value: 'extractHtmlContent', action: 'Extract HTML Content', }, { name: 'Convert to HTML Table', value: 'convertToHtmlTable', action: 'Convert to HTML Table', }, ], default: 'generateHtmlTemplate', }, { displayName: 'HTML Template', name: 'html', typeOptions: { editor: 'htmlEditor', }, type: 'string', default: placeholder, noDataExpression: true, description: 'HTML template to render', displayOptions: { show: { operation: ['generateHtmlTemplate'], }, }, }, { displayName: 'Tips: Type ctrl+space for completions. Use {{ }} for expressions and <style> tags for CSS. JS in <script> tags is included but not executed in n8n.', name: 'notice', type: 'notice', default: '', displayOptions: { show: { operation: ['generateHtmlTemplate'], }, }, }, { displayName: 'Source Data', name: 'sourceData', type: 'options', options: [ { name: 'Binary', value: 'binary', }, { name: 'JSON', value: 'json', }, ], default: 'json', description: 'If HTML should be read from binary or JSON data', displayOptions: { show: { operation: ['extractHtmlContent'], }, }, }, { displayName: 'Input Binary Field', name: 'dataPropertyName', type: 'string', requiresDataPath: 'single', displayOptions: { show: { operation: ['extractHtmlContent'], sourceData: ['binary'], }, }, default: 'data', required: true, hint: 'The name of the input binary field containing the file to be extracted', }, { displayName: 'JSON Property', name: 'dataPropertyName', type: 'string', requiresDataPath: 'single', displayOptions: { show: { operation: ['extractHtmlContent'], sourceData: ['json'], }, }, default: 'data', required: true, description: 'Name of the JSON property in which the HTML to extract the data from can be found. The property can either contain a string or an array of strings.', }, { ...extractionValuesCollection, displayOptions: { show: { operation: ['extractHtmlContent'], '@version': [1], }, }, }, { ...extractionValuesCollection, default: { values: [ { key: '', cssSelector: '', returnValue: 'text', returnArray: false, }, ], }, displayOptions: { show: { operation: ['extractHtmlContent'], '@version': [{ _cnd: { gt: 1 } }], }, }, }, { displayName: 'Options', name: 'options', type: 'collection', placeholder: 'Add option', default: {}, displayOptions: { show: { operation: ['extractHtmlContent'], }, }, options: [ { displayName: 'Trim Values', name: 'trimValues', type: 'boolean', default: true, description: 'Whether to remove automatically all spaces and newlines from the beginning and end of the values', }, { displayName: 'Clean Up Text', name: 'cleanUpText', type: 'boolean', default: true, description: 'Whether to remove leading and trailing whitespaces, line breaks (newlines) and condense multiple consecutive whitespaces into a single space', }, ], }, // ---------------------------------- // convertToHtmlTable // ---------------------------------- { displayName: 'Options', name: 'options', type: 'collection', placeholder: 'Add option', default: {}, displayOptions: { show: { operation: ['convertToHtmlTable'], }, }, options: [ { displayName: 'Capitalize Headers', name: 'capitalize', type: 'boolean', default: false, description: 'Whether to capitalize the headers', }, { displayName: 'Custom Styling', name: 'customStyling', type: 'boolean', default: false, description: 'Whether to use custom styling', }, { displayName: 'Caption', name: 'caption', type: 'string', default: '', description: 'Caption to add to the table', }, { displayName: 'Table Attributes', name: 'tableAttributes', type: 'string', default: '', description: 'Attributes to attach to the table', placeholder: 'e.g. style="padding:10px"', }, { displayName: 'Header Attributes', name: 'headerAttributes', type: 'string', default: '', description: 'Attributes to attach to the table header', placeholder: 'e.g. style="padding:10px"', }, { displayName: 'Row Attributes', name: 'rowAttributes', type: 'string', default: '', description: 'Attributes to attach to the table row', placeholder: 'e.g. style="padding:10px"', }, { displayName: 'Cell Attributes', name: 'cellAttributes', type: 'string', default: '', description: 'Attributes to attach to the table cell', placeholder: 'e.g. style="padding:10px"', }, ], }, ], }; async execute(this: IExecuteFunctions): Promise { const items = this.getInputData(); const operation = this.getNodeParameter('operation', 0); const nodeVersion = this.getNode().typeVersion; if (operation === 'convertToHtmlTable' && items.length) { let table = ''; const options = this.getNodeParameter('options', 0); let tableStyle = ''; let headerStyle = ''; let cellStyle = ''; if (!options.customStyling) { tableStyle = "style='border-spacing:0; font-family:helvetica,arial,sans-serif'"; headerStyle = "style='margin:0; padding:7px 20px 7px 0px; border-bottom:1px solid #eee; text-align:left; color:#888; font-weight:normal'"; cellStyle = "style='margin:0; padding:7px 20px 7px 0px; border-bottom:1px solid #eee'"; } const tableAttributes = (options.tableAttributes as string) || ''; const headerAttributes = (options.headerAttributes as string) || ''; const itemsData: IDataObject[] = []; const itemsKeys = new Set(); for (const entry of items) { itemsData.push(entry.json); for (const key of Object.keys(entry.json)) { itemsKeys.add(key); } } const headers = Array.from(itemsKeys); table += ``; if (options.caption) { table += ``; } table += ``; table += ''; table += headers .map((header) => '') .join(''); table += ''; table += ''; table += ''; itemsData.forEach((entry, entryIndex) => { const rowsAttributes = this.getNodeParameter( 'options.rowAttributes', entryIndex, '', ) as string; table += ``; const cellsAttributes = this.getNodeParameter( 'options.cellAttributes', entryIndex, '', ) as string; table += headers .map((header) => { let td = `'; return td; }) .join(''); table += ''; }); table += ''; table += '
${options.caption}
' + capitalizeHeader(header, options.capitalize as boolean) + '
`; if (typeof entry[header] === 'boolean') { const isChecked = entry[header] ? 'checked="checked"' : ''; td += ``; } else { td += entry[header]; } td += '
'; return [ [ { json: { table }, pairedItem: items.map((_item, index) => ({ item: index, })), }, ], ]; } let item: INodeExecutionData; const returnData: INodeExecutionData[] = []; for (let itemIndex = 0; itemIndex < items.length; itemIndex++) { try { if (operation === 'generateHtmlTemplate') { // ---------------------------------- // generateHtmlTemplate // ---------------------------------- let html = this.getNodeParameter('html', itemIndex) as string; for (const resolvable of getResolvables(html)) { html = html.replace( resolvable, this.evaluateExpression(resolvable, itemIndex) as string, ); } const result = this.helpers.constructExecutionMetaData( this.helpers.returnJsonArray({ html }), { itemData: { item: itemIndex }, }, ); returnData.push(...result); } else if (operation === 'extractHtmlContent') { // ---------------------------------- // extractHtmlContent // ---------------------------------- const dataPropertyName = this.getNodeParameter('dataPropertyName', itemIndex); const extractionValues = this.getNodeParameter( 'extractionValues', itemIndex, ) as IDataObject; const options = this.getNodeParameter('options', itemIndex, {}); const sourceData = this.getNodeParameter('sourceData', itemIndex) as string; item = items[itemIndex]; let htmlArray: string[] | string = []; if (sourceData === 'json') { if (nodeVersion === 1) { const key = sanitizeDataPathKey(item.json, dataPropertyName); if (item.json[key] === undefined) { throw new NodeOperationError( this.getNode(), `No property named "${dataPropertyName}" exists!`, { itemIndex }, ); } htmlArray = item.json[key] as string; } else { const value = get(item.json, dataPropertyName); if (value === undefined) { throw new NodeOperationError( this.getNode(), `No property named "${dataPropertyName}" exists!`, { itemIndex }, ); } htmlArray = value as string; } } else { this.helpers.assertBinaryData(itemIndex, dataPropertyName); const binaryDataBuffer = await this.helpers.getBinaryDataBuffer( itemIndex, dataPropertyName, ); htmlArray = binaryDataBuffer.toString('utf-8'); } // Convert it always to array that it works with a string or an array of strings if (!Array.isArray(htmlArray)) { htmlArray = [htmlArray]; } for (const html of htmlArray) { const $ = cheerio.load(html); const newItem: INodeExecutionData = { json: {}, pairedItem: { item: itemIndex, }, }; // Iterate over all the defined values which should be extracted let htmlElement; for (const valueData of extractionValues.values as IValueData[]) { htmlElement = $(valueData.cssSelector); if (valueData.returnArray) { // An array should be returned so iterate over one // value at a time newItem.json[valueData.key] = []; htmlElement.each((_, el) => { (newItem.json[valueData.key] as Array).push( getValue($(el), valueData, options, nodeVersion), ); }); } else { // One single value should be returned newItem.json[valueData.key] = getValue( htmlElement, valueData, options, nodeVersion, ); } } returnData.push(newItem); } } } catch (error) { if (this.continueOnFail()) { returnData.push({ json: { error: error.message, }, pairedItem: { item: itemIndex, }, }); continue; } throw error; } } return [returnData]; } }