mirror of
https://github.com/n8n-io/n8n.git
synced 2024-12-25 04:34:06 -08:00
✨ Add HTML Extract-Node
This commit is contained in:
parent
e1be291949
commit
50f3d9bb5a
|
@ -34,6 +34,7 @@ import {
|
|||
faClone,
|
||||
faCloud,
|
||||
faCopy,
|
||||
faCut,
|
||||
faDotCircle,
|
||||
faEdit,
|
||||
faEnvelope,
|
||||
|
@ -106,6 +107,7 @@ library.add(faCogs);
|
|||
library.add(faClone);
|
||||
library.add(faCloud);
|
||||
library.add(faCopy);
|
||||
library.add(faCut);
|
||||
library.add(faDotCircle);
|
||||
library.add(faEdit);
|
||||
library.add(faEnvelope);
|
||||
|
|
277
packages/nodes-base/nodes/HtmlExtract/HtmlExtract.node.ts
Normal file
277
packages/nodes-base/nodes/HtmlExtract/HtmlExtract.node.ts
Normal file
|
@ -0,0 +1,277 @@
|
|||
import * as cheerio from 'cheerio';
|
||||
import { IExecuteFunctions } from 'n8n-core';
|
||||
import {
|
||||
INodeExecutionData,
|
||||
INodeType,
|
||||
INodeTypeDescription,
|
||||
IDataObject,
|
||||
} from 'n8n-workflow';
|
||||
|
||||
interface IValueData {
|
||||
attribute?: string;
|
||||
cssSelector: string;
|
||||
returnValue: string;
|
||||
key: string;
|
||||
returnArray: boolean;
|
||||
}
|
||||
|
||||
|
||||
// The extraction functions
|
||||
const extractFunctions: {
|
||||
[key: string]: ($: Cheerio, valueData: IValueData) => string | undefined;
|
||||
} = {
|
||||
attribute: ($: Cheerio, valueData: IValueData): string | undefined => $.attr(valueData.attribute!),
|
||||
html: ($: Cheerio, valueData: IValueData): string | undefined => $.html() || undefined,
|
||||
text: ($: Cheerio, valueData: IValueData): string | undefined => $.text(),
|
||||
value: ($: Cheerio, valueData: IValueData): string | undefined => $.val(),
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Simple helper function which applies options
|
||||
*/
|
||||
function getValue($: Cheerio, valueData: IValueData, options: IDataObject) {
|
||||
const value = extractFunctions[valueData.returnValue]($, valueData);
|
||||
if (options.trimValues === false || value === undefined) {
|
||||
return value;
|
||||
}
|
||||
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
|
||||
export class HtmlExtract implements INodeType {
|
||||
description: INodeTypeDescription = {
|
||||
displayName: 'HTML Extract',
|
||||
name: 'htmlExtract',
|
||||
icon: 'fa:cut',
|
||||
group: ['transform'],
|
||||
version: 1,
|
||||
subtitle: '={{$parameter["sourceData"] + ": " + $parameter["dataPropertyName"]}}',
|
||||
description: 'Extracts data from HTML',
|
||||
defaults: {
|
||||
name: 'HTML Extract',
|
||||
color: '#333377',
|
||||
},
|
||||
inputs: ['main'],
|
||||
outputs: ['main'],
|
||||
properties: [
|
||||
{
|
||||
displayName: 'Source Data',
|
||||
name: 'sourceData',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
name: 'Binary',
|
||||
value: 'binary',
|
||||
},
|
||||
{
|
||||
name: 'JSON',
|
||||
value: 'json',
|
||||
},
|
||||
],
|
||||
default: 'json',
|
||||
description: 'If HTML should be read from binary or json data.',
|
||||
},
|
||||
{
|
||||
displayName: 'Binary Property',
|
||||
name: 'dataPropertyName',
|
||||
type: 'string',
|
||||
displayOptions: {
|
||||
show: {
|
||||
sourceData: [
|
||||
'binary',
|
||||
],
|
||||
},
|
||||
},
|
||||
default: 'data',
|
||||
required: true,
|
||||
description: 'Name of the binary property in which the HTML to extract the data from can be found.',
|
||||
},
|
||||
{
|
||||
displayName: 'JSON Property',
|
||||
name: 'dataPropertyName',
|
||||
type: 'string',
|
||||
displayOptions: {
|
||||
show: {
|
||||
sourceData: [
|
||||
'json',
|
||||
],
|
||||
},
|
||||
},
|
||||
default: 'data',
|
||||
required: true,
|
||||
description: 'Name of the json property in which the HTML to extract the data from can be found.<br />The property can either contain a string or an array of strings.',
|
||||
},
|
||||
{
|
||||
displayName: 'Extraction Values',
|
||||
name: 'extractionValues',
|
||||
placeholder: 'Add Value',
|
||||
type: 'fixedCollection',
|
||||
typeOptions: {
|
||||
multipleValues: true,
|
||||
},
|
||||
description: 'The extraction values.',
|
||||
default: {},
|
||||
options: [
|
||||
{
|
||||
name: 'values',
|
||||
displayName: 'Values',
|
||||
values: [
|
||||
{
|
||||
displayName: 'Key',
|
||||
name: 'key',
|
||||
type: 'string',
|
||||
default: '',
|
||||
description: 'The key under which the extracted value should be saved.',
|
||||
},
|
||||
{
|
||||
displayName: 'CSS Selector',
|
||||
name: 'cssSelector',
|
||||
type: 'string',
|
||||
default: '',
|
||||
placeholder: '.price',
|
||||
description: 'The CSS selector to use.',
|
||||
},
|
||||
{
|
||||
displayName: 'Return Value',
|
||||
name: 'returnValue',
|
||||
type: 'options',
|
||||
options: [
|
||||
{
|
||||
name: 'Attribute',
|
||||
value: 'attribute',
|
||||
description: 'Get an attribute value like "class" from an element.',
|
||||
},
|
||||
{
|
||||
name: 'HTML',
|
||||
value: 'html',
|
||||
description: 'Get the HTML the element contains.',
|
||||
},
|
||||
{
|
||||
name: 'Text',
|
||||
value: 'text',
|
||||
description: 'Get only the text content of the element.',
|
||||
},
|
||||
{
|
||||
name: 'Value',
|
||||
value: 'value',
|
||||
description: 'Get value of an input, select or textarea.',
|
||||
},
|
||||
],
|
||||
default: 'text',
|
||||
description: 'What kind of data should be returned.',
|
||||
},
|
||||
{
|
||||
displayName: 'Attribute',
|
||||
name: 'attribute',
|
||||
type: 'string',
|
||||
displayOptions: {
|
||||
show: {
|
||||
returnValue: [
|
||||
'attribute',
|
||||
],
|
||||
},
|
||||
},
|
||||
default: '',
|
||||
placeholder: 'class',
|
||||
description: 'The name of the attribute to return the value off.',
|
||||
},
|
||||
{
|
||||
displayName: 'Return Array',
|
||||
name: 'returnArray',
|
||||
type: 'boolean',
|
||||
default: false,
|
||||
description: 'Returns the values as an array so if multiple ones get found they also get<br />returned separately.If not set all will be returned as a single string.',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
displayName: 'Options',
|
||||
name: 'options',
|
||||
type: 'collection',
|
||||
placeholder: 'Add Option',
|
||||
default: {},
|
||||
options: [
|
||||
{
|
||||
displayName: 'Trim Values',
|
||||
name: 'trimValues',
|
||||
type: 'boolean',
|
||||
default: true,
|
||||
description: 'Removes automatically all spaces and newlines from<br />the beginning and end of the values.',
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
|
||||
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
||||
const items = this.getInputData();
|
||||
|
||||
const returnData: INodeExecutionData[] = [];
|
||||
|
||||
let item: INodeExecutionData;
|
||||
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
||||
const dataPropertyName = this.getNodeParameter('dataPropertyName', itemIndex) as string;
|
||||
const extractionValues = this.getNodeParameter('extractionValues', itemIndex) as IDataObject;
|
||||
const options = this.getNodeParameter('options', itemIndex, {}) as IDataObject;
|
||||
const sourceData = this.getNodeParameter('sourceData', itemIndex) as string;
|
||||
|
||||
item = items[itemIndex];
|
||||
|
||||
let htmlArray: string[] | string = [];
|
||||
if (sourceData === 'json') {
|
||||
if (item.json[dataPropertyName] === undefined) {
|
||||
throw new Error(`No property named "${dataPropertyName}" exists!`);
|
||||
}
|
||||
htmlArray = item.json[dataPropertyName] as string;
|
||||
} else {
|
||||
if (item.binary === undefined) {
|
||||
throw new Error(`No item does not contain binary data!`);
|
||||
}
|
||||
if (item.binary[dataPropertyName] === undefined) {
|
||||
throw new Error(`No property named "${dataPropertyName}" exists!`);
|
||||
}
|
||||
htmlArray = Buffer.from(item.binary[dataPropertyName].data, 'base64').toString('utf8');
|
||||
}
|
||||
|
||||
// Convert it always to array that it works with a string or an array of strings
|
||||
if (!Array.isArray(htmlArray)) {
|
||||
htmlArray = [htmlArray];
|
||||
}
|
||||
|
||||
for (const html of htmlArray as string[]) {
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const newItem: INodeExecutionData = {
|
||||
json: {},
|
||||
};
|
||||
|
||||
// Itterate over all the defined values which should be extracted
|
||||
let htmlElement;
|
||||
for (const valueData of extractionValues.values as IValueData[]) {
|
||||
htmlElement = $(valueData.cssSelector);
|
||||
|
||||
if (valueData.returnArray === true) {
|
||||
// An array should be returned so itterate over one
|
||||
// value at a time
|
||||
newItem.json[valueData.key as string] = [];
|
||||
htmlElement.each((i, el) => {
|
||||
(newItem.json[valueData.key as string] as Array<string | undefined>).push(getValue($(el), valueData, options));
|
||||
});
|
||||
} else {
|
||||
// One single value should be returned
|
||||
newItem.json[valueData.key as string] = getValue(htmlElement, valueData, options);
|
||||
}
|
||||
}
|
||||
returnData.push(newItem);
|
||||
}
|
||||
}
|
||||
|
||||
return this.prepareOutputData(returnData);
|
||||
}
|
||||
}
|
|
@ -109,6 +109,7 @@
|
|||
"dist/nodes/Google/GoogleDrive.node.js",
|
||||
"dist/nodes/Google/GoogleSheets.node.js",
|
||||
"dist/nodes/GraphQL/GraphQL.node.js",
|
||||
"dist/nodes/HtmlExtract/HtmlExtract.node.js",
|
||||
"dist/nodes/HttpRequest.node.js",
|
||||
"dist/nodes/Hubspot/Hubspot.node.js",
|
||||
"dist/nodes/If.node.js",
|
||||
|
@ -167,6 +168,7 @@
|
|||
"devDependencies": {
|
||||
"@types/aws4": "^1.5.1",
|
||||
"@types/basic-auth": "^1.1.2",
|
||||
"@types/cheerio": "^0.22.15",
|
||||
"@types/cron": "^1.6.1",
|
||||
"@types/express": "^4.16.1",
|
||||
"@types/gm": "^1.18.2",
|
||||
|
@ -189,6 +191,7 @@
|
|||
"dependencies": {
|
||||
"aws4": "^1.8.0",
|
||||
"basic-auth": "^2.0.1",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"cron": "^1.6.0",
|
||||
"glob-promise": "^3.4.0",
|
||||
"gm": "^1.23.1",
|
||||
|
|
Loading…
Reference in a new issue