diff --git a/packages/editor-ui/src/main.ts b/packages/editor-ui/src/main.ts
index 9cfa172e33..048a718d96 100644
--- a/packages/editor-ui/src/main.ts
+++ b/packages/editor-ui/src/main.ts
@@ -34,6 +34,7 @@ import {
faClone,
faCloud,
faCopy,
+ faCut,
faDotCircle,
faEdit,
faEnvelope,
@@ -106,6 +107,7 @@ library.add(faCogs);
library.add(faClone);
library.add(faCloud);
library.add(faCopy);
+library.add(faCut);
library.add(faDotCircle);
library.add(faEdit);
library.add(faEnvelope);
diff --git a/packages/nodes-base/nodes/HtmlExtract/HtmlExtract.node.ts b/packages/nodes-base/nodes/HtmlExtract/HtmlExtract.node.ts
new file mode 100644
index 0000000000..ddedbc0823
--- /dev/null
+++ b/packages/nodes-base/nodes/HtmlExtract/HtmlExtract.node.ts
@@ -0,0 +1,277 @@
+import * as cheerio from 'cheerio';
+import { IExecuteFunctions } from 'n8n-core';
+import {
+ INodeExecutionData,
+ INodeType,
+ INodeTypeDescription,
+ IDataObject,
+} from 'n8n-workflow';
+
+interface IValueData {
+ attribute?: string;
+ cssSelector: string;
+ returnValue: string;
+ key: string;
+ returnArray: boolean;
+}
+
+
+// The extraction functions
+const extractFunctions: {
+ [key: string]: ($: Cheerio, valueData: IValueData) => string | undefined;
+} = {
+ attribute: ($: Cheerio, valueData: IValueData): string | undefined => $.attr(valueData.attribute!),
+ html: ($: Cheerio, valueData: IValueData): string | undefined => $.html() || undefined,
+ text: ($: Cheerio, valueData: IValueData): string | undefined => $.text(),
+ value: ($: Cheerio, valueData: IValueData): string | undefined => $.val(),
+};
+
+
+/**
+ * Simple helper function which applies options
+ */
+function getValue($: Cheerio, valueData: IValueData, options: IDataObject) {
+ const value = extractFunctions[valueData.returnValue]($, valueData);
+ if (options.trimValues === false || value === undefined) {
+ return value;
+ }
+
+ return value.trim();
+}
+
+
+export class HtmlExtract implements INodeType {
+ description: INodeTypeDescription = {
+ displayName: 'HTML Extract',
+ name: 'htmlExtract',
+ icon: 'fa:cut',
+ group: ['transform'],
+ version: 1,
+ subtitle: '={{$parameter["sourceData"] + ": " + $parameter["dataPropertyName"]}}',
+ description: 'Extracts data from HTML',
+ defaults: {
+ name: 'HTML Extract',
+ color: '#333377',
+ },
+ inputs: ['main'],
+ outputs: ['main'],
+ properties: [
+ {
+ displayName: 'Source Data',
+ name: 'sourceData',
+ type: 'options',
+ options: [
+ {
+ name: 'Binary',
+ value: 'binary',
+ },
+ {
+ name: 'JSON',
+ value: 'json',
+ },
+ ],
+ default: 'json',
+ description: 'If HTML should be read from binary or json data.',
+ },
+ {
+ displayName: 'Binary Property',
+ name: 'dataPropertyName',
+ type: 'string',
+ displayOptions: {
+ show: {
+ sourceData: [
+ 'binary',
+ ],
+ },
+ },
+ default: 'data',
+ required: true,
+ description: 'Name of the binary property in which the HTML to extract the data from can be found.',
+ },
+ {
+ displayName: 'JSON Property',
+ name: 'dataPropertyName',
+ type: 'string',
+ displayOptions: {
+ show: {
+ sourceData: [
+ 'json',
+ ],
+ },
+ },
+ default: 'data',
+ required: true,
+ description: 'Name of the json property in which the HTML to extract the data from can be found.
The property can either contain a string or an array of strings.',
+ },
+ {
+ displayName: 'Extraction Values',
+ name: 'extractionValues',
+ placeholder: 'Add Value',
+ type: 'fixedCollection',
+ typeOptions: {
+ multipleValues: true,
+ },
+ description: 'The extraction values.',
+ default: {},
+ options: [
+ {
+ name: 'values',
+ displayName: 'Values',
+ values: [
+ {
+ displayName: 'Key',
+ name: 'key',
+ type: 'string',
+ default: '',
+ description: 'The key under which the extracted value should be saved.',
+ },
+ {
+ displayName: 'CSS Selector',
+ name: 'cssSelector',
+ type: 'string',
+ default: '',
+ placeholder: '.price',
+ description: 'The CSS selector to use.',
+ },
+ {
+ displayName: 'Return Value',
+ name: 'returnValue',
+ type: 'options',
+ options: [
+ {
+ name: 'Attribute',
+ value: 'attribute',
+ description: 'Get an attribute value like "class" from an element.',
+ },
+ {
+ name: 'HTML',
+ value: 'html',
+ description: 'Get the HTML the element contains.',
+ },
+ {
+ name: 'Text',
+ value: 'text',
+ description: 'Get only the text content of the element.',
+ },
+ {
+ name: 'Value',
+ value: 'value',
+ description: 'Get value of an input, select or textarea.',
+ },
+ ],
+ default: 'text',
+ description: 'What kind of data should be returned.',
+ },
+ {
+ displayName: 'Attribute',
+ name: 'attribute',
+ type: 'string',
+ displayOptions: {
+ show: {
+ returnValue: [
+ 'attribute',
+ ],
+ },
+ },
+ default: '',
+ placeholder: 'class',
+ description: 'The name of the attribute to return the value off.',
+ },
+ {
+ displayName: 'Return Array',
+ name: 'returnArray',
+ type: 'boolean',
+ default: false,
+ description: 'Returns the values as an array so if multiple ones get found they also get
returned separately.If not set all will be returned as a single string.',
+ },
+ ],
+ },
+ ],
+ },
+
+ {
+ displayName: 'Options',
+ name: 'options',
+ type: 'collection',
+ placeholder: 'Add Option',
+ default: {},
+ options: [
+ {
+ displayName: 'Trim Values',
+ name: 'trimValues',
+ type: 'boolean',
+ default: true,
+ description: 'Removes automatically all spaces and newlines from
the beginning and end of the values.',
+ },
+ ],
+ }
+ ]
+ };
+
+
+ async execute(this: IExecuteFunctions): Promise {
+ const items = this.getInputData();
+
+ const returnData: INodeExecutionData[] = [];
+
+ let item: INodeExecutionData;
+ for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
+ const dataPropertyName = this.getNodeParameter('dataPropertyName', itemIndex) as string;
+ const extractionValues = this.getNodeParameter('extractionValues', itemIndex) as IDataObject;
+ const options = this.getNodeParameter('options', itemIndex, {}) as IDataObject;
+ const sourceData = this.getNodeParameter('sourceData', itemIndex) as string;
+
+ item = items[itemIndex];
+
+ let htmlArray: string[] | string = [];
+ if (sourceData === 'json') {
+ if (item.json[dataPropertyName] === undefined) {
+ throw new Error(`No property named "${dataPropertyName}" exists!`);
+ }
+ htmlArray = item.json[dataPropertyName] as string;
+ } else {
+ if (item.binary === undefined) {
+ throw new Error(`No item does not contain binary data!`);
+ }
+ if (item.binary[dataPropertyName] === undefined) {
+ throw new Error(`No property named "${dataPropertyName}" exists!`);
+ }
+ htmlArray = Buffer.from(item.binary[dataPropertyName].data, 'base64').toString('utf8');
+ }
+
+ // Convert it always to array that it works with a string or an array of strings
+ if (!Array.isArray(htmlArray)) {
+ htmlArray = [htmlArray];
+ }
+
+ for (const html of htmlArray as string[]) {
+ const $ = cheerio.load(html);
+
+ const newItem: INodeExecutionData = {
+ json: {},
+ };
+
+ // Itterate over all the defined values which should be extracted
+ let htmlElement;
+ for (const valueData of extractionValues.values as IValueData[]) {
+ htmlElement = $(valueData.cssSelector);
+
+ if (valueData.returnArray === true) {
+ // An array should be returned so itterate over one
+ // value at a time
+ newItem.json[valueData.key as string] = [];
+ htmlElement.each((i, el) => {
+ (newItem.json[valueData.key as string] as Array).push(getValue($(el), valueData, options));
+ });
+ } else {
+ // One single value should be returned
+ newItem.json[valueData.key as string] = getValue(htmlElement, valueData, options);
+ }
+ }
+ returnData.push(newItem);
+ }
+ }
+
+ return this.prepareOutputData(returnData);
+ }
+}
diff --git a/packages/nodes-base/package.json b/packages/nodes-base/package.json
index fa5665b66e..ebd64cab02 100644
--- a/packages/nodes-base/package.json
+++ b/packages/nodes-base/package.json
@@ -109,6 +109,7 @@
"dist/nodes/Google/GoogleDrive.node.js",
"dist/nodes/Google/GoogleSheets.node.js",
"dist/nodes/GraphQL/GraphQL.node.js",
+ "dist/nodes/HtmlExtract/HtmlExtract.node.js",
"dist/nodes/HttpRequest.node.js",
"dist/nodes/Hubspot/Hubspot.node.js",
"dist/nodes/If.node.js",
@@ -167,6 +168,7 @@
"devDependencies": {
"@types/aws4": "^1.5.1",
"@types/basic-auth": "^1.1.2",
+ "@types/cheerio": "^0.22.15",
"@types/cron": "^1.6.1",
"@types/express": "^4.16.1",
"@types/gm": "^1.18.2",
@@ -189,6 +191,7 @@
"dependencies": {
"aws4": "^1.8.0",
"basic-auth": "^2.0.1",
+ "cheerio": "^1.0.0-rc.3",
"cron": "^1.6.0",
"glob-promise": "^3.4.0",
"gm": "^1.23.1",