mirror of
https://github.com/n8n-io/n8n.git
synced 2025-01-07 10:57:29 -08:00
d6239d5bfb
* Update Compression node * Update Crypto node * Update DateTime node * Update EditImage node * Update EmailSend node * Update ExecuteWorkflow node * Update FTP node * Update Function node * Update FunctionItem node * Update ExecuteCommand node * Update OpenWeatherMap node * Update ReadBinaryFile node * Update ReadPdf node * Update RssFeedRead node & add URL validation * Update SpreadsheetFile node * Update Switch node * Update WriteBinaryFile node * Update Xml node * Update ActiveCampaign node * Update Airtable node * Update ApiTemplateIo node * Update Asana node * Update AwsLambda node * Update AwsSns node * Update AwsComprehend node * Update AwsRekognition node * Update AwsS3 node * Fix Error item * Update AwsSes node * Update AwsSqs node * Update Amqp node * Update Bitly node * Update Box node * Update Brandfetch node * Update CircleCi node * Update Clearbit node * Update ClickUp node * Update Cockpit node * Update CoinGecko node * Update Contentful node * Update ConvertKit node * Update Cortex node * Update CustomerIo node * Update DeepL node * Update Demio node * Update Disqus node * Update Drift node * Update Dropbox node * Update GetResponse node * Refactor & Update Ghost node * Update Github node * Update Gitlab node * Update GoogleAnalytics node * Update GoogleBooks node * Update GoogleCalendar node * Update GoogleDrive node * Update Gmail node * Update GoogleSheets node * Update GoogleSlides node * Update GoogleTasks node * Update Gotify node * Update GraphQL node * Update HackerNews node * Update Harvest node * Update HtmlExtract node * Update Hubspot node * Update Hunter node * Update Intercom node * Update Kafka node * Refactor & update Line node * Update LinkedIn node * Update Mailchimp node * Update Mandrill node * Update Matrix node * Update Mautic node * Update Medium node * Update MessageBird node * Update Mindee node * Update Mocean node * Update MondayCom node * Update MicrosoftExcel node * Update MicrosoftOneDrive node * Update MicrosoftOutlook node * Update Affinity node * Update Chargebee node * Update Discourse node * Update Freshdesk node * Update YouTube node * Update InvoiceNinja node * Update MailerLite node * Update Mailgun node * Update Mailjet node * Update Mattermost node * Update Nasa node * Update NextCloud node * Update OpenThesaurus node * Update Orbit node * Update PagerDuty node * Update PayPal node * Update Peekalink node * Update Phantombuster node * Update PostHog node * Update ProfitWell node * Refactor & Update Pushbullet node * Update QuickBooks node * Update Raindrop node * Update Reddit node * Update Rocketchat node * Update S3 node * Update Salesforce node * Update SendGrid node * Update SentryIo node * Update Shopify node * Update Signl4 node * Update Slack node * Update Spontit node * Update Spotify node * Update Storyblok node * Refactor & Update Strapi node * Refactor & Update Strava node * Update Taiga node * Refactor & update Tapfiliate node * Update Telegram node * Update TheHive node * Update Todoist node * Update TravisCi node * Update Trello node * Update Twilio node * Update Twist node * Update Twitter node * Update Uplead node * Update UProc node * Update Vero node * Update Webflow node * Update Wekan node * Update Wordpress node * Update Xero node * Update Yourls node * Update Zendesk node * Update ZohoCrm node * Refactor & Update Zoom node * Update Zulip node * Update Clockify node * Update MongoDb node * Update MySql node * Update MicrosoftTeams node * Update Stackby node * Refactor Discourse node * Support corner-case in Github node update * Support corner-case in Gitlab node update * Refactor & Update GoogleContacts node * Refactor Mindee node * Update Coda node * Lint fixes * Update Beeminder node * Update Google Firebase RealtimeDatabase node * Update HelpScout node * Update Mailcheck node * Update Paddle node * Update Pipedrive node * Update Pushover node * Update Segment node * Refactor & Update Vonage node * Added new conditions to warnings on execute batch cmd * Added keep only properties flag * Fixed code for keep only props * Added dependencies for image editing Co-authored-by: dali <servfrdali@yahoo.fr>
289 lines
7.8 KiB
TypeScript
289 lines
7.8 KiB
TypeScript
import * as cheerio from 'cheerio';
|
|
import { IExecuteFunctions } from 'n8n-core';
|
|
import {
|
|
IDataObject,
|
|
INodeExecutionData,
|
|
INodeType,
|
|
INodeTypeDescription,
|
|
NodeOperationError,
|
|
} from 'n8n-workflow';
|
|
|
|
type Cheerio = ReturnType<typeof cheerio>;
|
|
|
|
interface IValueData {
|
|
attribute?: string;
|
|
cssSelector: string;
|
|
returnValue: string;
|
|
key: string;
|
|
returnArray: boolean;
|
|
}
|
|
|
|
|
|
// The extraction functions
|
|
const extractFunctions: {
|
|
[key: string]: ($: Cheerio, valueData: IValueData) => string | undefined;
|
|
} = {
|
|
attribute: ($: Cheerio, valueData: IValueData): string | undefined => $.attr(valueData.attribute!),
|
|
html: ($: Cheerio, valueData: IValueData): string | undefined => $.html() || undefined,
|
|
text: ($: Cheerio, valueData: IValueData): string | undefined => $.text(),
|
|
value: ($: Cheerio, valueData: IValueData): string | undefined => $.val(),
|
|
};
|
|
|
|
|
|
/**
|
|
* Simple helper function which applies options
|
|
*/
|
|
function getValue($: Cheerio, valueData: IValueData, options: IDataObject) {
|
|
const value = extractFunctions[valueData.returnValue]($, valueData);
|
|
if (options.trimValues === false || value === undefined) {
|
|
return value;
|
|
}
|
|
|
|
return value.trim();
|
|
}
|
|
|
|
|
|
export class HtmlExtract implements INodeType {
|
|
description: INodeTypeDescription = {
|
|
displayName: 'HTML Extract',
|
|
name: 'htmlExtract',
|
|
icon: 'fa:cut',
|
|
group: ['transform'],
|
|
version: 1,
|
|
subtitle: '={{$parameter["sourceData"] + ": " + $parameter["dataPropertyName"]}}',
|
|
description: 'Extracts data from HTML',
|
|
defaults: {
|
|
name: 'HTML Extract',
|
|
color: '#333377',
|
|
},
|
|
inputs: ['main'],
|
|
outputs: ['main'],
|
|
properties: [
|
|
{
|
|
displayName: 'Source Data',
|
|
name: 'sourceData',
|
|
type: 'options',
|
|
options: [
|
|
{
|
|
name: 'Binary',
|
|
value: 'binary',
|
|
},
|
|
{
|
|
name: 'JSON',
|
|
value: 'json',
|
|
},
|
|
],
|
|
default: 'json',
|
|
description: 'If HTML should be read from binary or json data.',
|
|
},
|
|
{
|
|
displayName: 'Binary Property',
|
|
name: 'dataPropertyName',
|
|
type: 'string',
|
|
displayOptions: {
|
|
show: {
|
|
sourceData: [
|
|
'binary',
|
|
],
|
|
},
|
|
},
|
|
default: 'data',
|
|
required: true,
|
|
description: 'Name of the binary property in which the HTML to extract the data from can be found.',
|
|
},
|
|
{
|
|
displayName: 'JSON Property',
|
|
name: 'dataPropertyName',
|
|
type: 'string',
|
|
displayOptions: {
|
|
show: {
|
|
sourceData: [
|
|
'json',
|
|
],
|
|
},
|
|
},
|
|
default: 'data',
|
|
required: true,
|
|
description: 'Name of the json property in which the HTML to extract the data from can be found.<br />The property can either contain a string or an array of strings.',
|
|
},
|
|
{
|
|
displayName: 'Extraction Values',
|
|
name: 'extractionValues',
|
|
placeholder: 'Add Value',
|
|
type: 'fixedCollection',
|
|
typeOptions: {
|
|
multipleValues: true,
|
|
},
|
|
description: 'The extraction values.',
|
|
default: {},
|
|
options: [
|
|
{
|
|
name: 'values',
|
|
displayName: 'Values',
|
|
values: [
|
|
{
|
|
displayName: 'Key',
|
|
name: 'key',
|
|
type: 'string',
|
|
default: '',
|
|
description: 'The key under which the extracted value should be saved.',
|
|
},
|
|
{
|
|
displayName: 'CSS Selector',
|
|
name: 'cssSelector',
|
|
type: 'string',
|
|
default: '',
|
|
placeholder: '.price',
|
|
description: 'The CSS selector to use.',
|
|
},
|
|
{
|
|
displayName: 'Return Value',
|
|
name: 'returnValue',
|
|
type: 'options',
|
|
options: [
|
|
{
|
|
name: 'Attribute',
|
|
value: 'attribute',
|
|
description: 'Get an attribute value like "class" from an element.',
|
|
},
|
|
{
|
|
name: 'HTML',
|
|
value: 'html',
|
|
description: 'Get the HTML the element contains.',
|
|
},
|
|
{
|
|
name: 'Text',
|
|
value: 'text',
|
|
description: 'Get only the text content of the element.',
|
|
},
|
|
{
|
|
name: 'Value',
|
|
value: 'value',
|
|
description: 'Get value of an input, select or textarea.',
|
|
},
|
|
],
|
|
default: 'text',
|
|
description: 'What kind of data should be returned.',
|
|
},
|
|
{
|
|
displayName: 'Attribute',
|
|
name: 'attribute',
|
|
type: 'string',
|
|
displayOptions: {
|
|
show: {
|
|
returnValue: [
|
|
'attribute',
|
|
],
|
|
},
|
|
},
|
|
default: '',
|
|
placeholder: 'class',
|
|
description: 'The name of the attribute to return the value off.',
|
|
},
|
|
{
|
|
displayName: 'Return Array',
|
|
name: 'returnArray',
|
|
type: 'boolean',
|
|
default: false,
|
|
description: 'Returns the values as an array so if multiple ones get found they also get<br />returned separately.If not set all will be returned as a single string.',
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
|
|
{
|
|
displayName: 'Options',
|
|
name: 'options',
|
|
type: 'collection',
|
|
placeholder: 'Add Option',
|
|
default: {},
|
|
options: [
|
|
{
|
|
displayName: 'Trim Values',
|
|
name: 'trimValues',
|
|
type: 'boolean',
|
|
default: true,
|
|
description: 'Removes automatically all spaces and newlines from<br />the beginning and end of the values.',
|
|
},
|
|
],
|
|
},
|
|
],
|
|
};
|
|
|
|
|
|
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
|
const items = this.getInputData();
|
|
|
|
const returnData: INodeExecutionData[] = [];
|
|
|
|
let item: INodeExecutionData;
|
|
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
|
try {
|
|
const dataPropertyName = this.getNodeParameter('dataPropertyName', itemIndex) as string;
|
|
const extractionValues = this.getNodeParameter('extractionValues', itemIndex) as IDataObject;
|
|
const options = this.getNodeParameter('options', itemIndex, {}) as IDataObject;
|
|
const sourceData = this.getNodeParameter('sourceData', itemIndex) as string;
|
|
|
|
item = items[itemIndex];
|
|
|
|
let htmlArray: string[] | string = [];
|
|
if (sourceData === 'json') {
|
|
if (item.json[dataPropertyName] === undefined) {
|
|
throw new NodeOperationError(this.getNode(), `No property named "${dataPropertyName}" exists!`);
|
|
}
|
|
htmlArray = item.json[dataPropertyName] as string;
|
|
} else {
|
|
if (item.binary === undefined) {
|
|
throw new NodeOperationError(this.getNode(), `No item does not contain binary data!`);
|
|
}
|
|
if (item.binary[dataPropertyName] === undefined) {
|
|
throw new NodeOperationError(this.getNode(), `No property named "${dataPropertyName}" exists!`);
|
|
}
|
|
htmlArray = Buffer.from(item.binary[dataPropertyName].data, 'base64').toString('utf8');
|
|
}
|
|
|
|
// Convert it always to array that it works with a string or an array of strings
|
|
if (!Array.isArray(htmlArray)) {
|
|
htmlArray = [htmlArray];
|
|
}
|
|
|
|
for (const html of htmlArray as string[]) {
|
|
const $ = cheerio.load(html);
|
|
|
|
const newItem: INodeExecutionData = {
|
|
json: {},
|
|
};
|
|
|
|
// Itterate over all the defined values which should be extracted
|
|
let htmlElement;
|
|
for (const valueData of extractionValues.values as IValueData[]) {
|
|
htmlElement = $(valueData.cssSelector);
|
|
|
|
if (valueData.returnArray === true) {
|
|
// An array should be returned so itterate over one
|
|
// value at a time
|
|
newItem.json[valueData.key as string] = [];
|
|
htmlElement.each((i, el) => {
|
|
(newItem.json[valueData.key as string] as Array<string | undefined>).push(getValue($(el), valueData, options));
|
|
});
|
|
} else {
|
|
// One single value should be returned
|
|
newItem.json[valueData.key as string] = getValue(htmlElement, valueData, options);
|
|
}
|
|
}
|
|
returnData.push(newItem);
|
|
}
|
|
} catch (error) {
|
|
if (this.continueOnFail()) {
|
|
returnData.push({ json: { error: error.message } });
|
|
continue;
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
return this.prepareOutputData(returnData);
|
|
}
|
|
}
|