n8n/packages/@n8n/nodes-langchain/nodes/document_loaders/DocumentDefaultDataLoader/DocumentDefaultDataLoader.node.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

304 lines
7.3 KiB
TypeScript
Raw Normal View History

/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
import {
NodeConnectionType,
type IExecuteFunctions,
type INodeType,
type INodeTypeDescription,
type SupplyData,
} from 'n8n-workflow';
import type { TextSplitter } from '@langchain/textsplitters';
import { logWrapper } from '../../../utils/logWrapper';
import { N8nBinaryLoader } from '../../../utils/N8nBinaryLoader';
import { metadataFilterField } from '../../../utils/sharedFields';
// Dependencies needed underneath the hood for the loaders. We add them
// here only to track where what dependency is sued
// import 'd3-dsv'; // for csv
import 'mammoth'; // for docx
import 'epub2'; // for epub
import 'pdf-parse'; // for pdf
import { N8nJsonLoader } from '../../../utils/N8nJsonLoader';
export class DocumentDefaultDataLoader implements INodeType {
description: INodeTypeDescription = {
displayName: 'Default Data Loader',
name: 'documentDefaultDataLoader',
icon: 'file:binary.svg',
group: ['transform'],
version: 1,
description: 'Load data from previous step in the workflow',
defaults: {
name: 'Default Data Loader',
},
codex: {
categories: ['AI'],
subcategories: {
AI: ['Document Loaders'],
},
resources: {
primaryDocumentation: [
{
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/',
},
],
},
},
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
inputs: [
{
displayName: 'Text Splitter',
maxConnections: 1,
type: NodeConnectionType.AiTextSplitter,
required: true,
},
],
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
outputs: [NodeConnectionType.AiDocument],
outputNames: ['Document'],
properties: [
{
displayName:
'This will load data from a previous step in the workflow. <a href="/templates/1962" target="_blank">Example</a>',
name: 'notice',
type: 'notice',
default: '',
},
{
displayName: 'Type of Data',
name: 'dataType',
type: 'options',
default: 'json',
required: true,
noDataExpression: true,
options: [
{
name: 'JSON',
value: 'json',
description: 'Process JSON data from previous step in the workflow',
},
{
name: 'Binary',
value: 'binary',
description: 'Process binary data from previous step in the workflow',
},
],
},
{
displayName: 'Mode',
name: 'jsonMode',
type: 'options',
default: 'allInputData',
required: true,
displayOptions: {
show: {
dataType: ['json'],
},
},
options: [
{
name: 'Load All Input Data',
value: 'allInputData',
description: 'Use all JSON data that flows into the parent agent or chain',
},
{
name: 'Load Specific Data',
value: 'expressionData',
description:
'Load a subset of data, and/or data from any previous step in the workflow',
},
],
},
{
displayName: 'Mode',
name: 'binaryMode',
type: 'options',
default: 'allInputData',
required: true,
displayOptions: {
show: {
dataType: ['binary'],
},
},
options: [
{
name: 'Load All Input Data',
value: 'allInputData',
description: 'Use all Binary data that flows into the parent agent or chain',
},
{
name: 'Load Specific Data',
value: 'specificField',
description: 'Load data from a specific field in the parent agent or chain',
},
],
},
{
displayName: 'Data Format',
name: 'loader',
type: 'options',
default: 'auto',
required: true,
displayOptions: {
show: {
dataType: ['binary'],
},
},
options: [
{
name: 'Automatically Detect by Mime Type',
value: 'auto',
description: 'Uses the mime type to detect the format',
},
{
name: 'CSV',
value: 'csvLoader',
description: 'Load CSV files',
},
{
name: 'Docx',
value: 'docxLoader',
description: 'Load Docx documents',
},
{
name: 'EPub',
value: 'epubLoader',
description: 'Load EPub files',
},
{
name: 'JSON',
value: 'jsonLoader',
description: 'Load JSON files',
},
{
name: 'PDF',
value: 'pdfLoader',
description: 'Load PDF documents',
},
{
name: 'Text',
value: 'textLoader',
description: 'Load plain text files',
},
],
},
{
displayName: 'Data',
name: 'jsonData',
type: 'string',
typeOptions: {
rows: 6,
},
default: '',
required: true,
description: 'Drag and drop fields from the input pane, or use an expression',
displayOptions: {
show: {
dataType: ['json'],
jsonMode: ['expressionData'],
},
},
},
{
displayName: 'Input Data Field Name',
name: 'binaryDataKey',
type: 'string',
default: 'data',
required: true,
description:
'The name of the field in the agent or chains input that contains the binary file to be processed',
displayOptions: {
show: {
dataType: ['binary'],
},
hide: {
binaryMode: ['allInputData'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Option',
default: {},
options: [
{
displayName: 'JSON Pointers',
name: 'pointers',
type: 'string',
default: '',
description: 'Pointers to extract from JSON, e.g. "/text" or "/text, /meta/title"',
displayOptions: {
show: {
'/loader': ['jsonLoader', 'auto'],
},
},
},
{
displayName: 'CSV Separator',
name: 'separator',
type: 'string',
description: 'Separator to use for CSV',
default: ',',
displayOptions: {
show: {
'/loader': ['csvLoader', 'auto'],
},
},
},
{
displayName: 'CSV Column',
name: 'column',
type: 'string',
default: '',
description: 'Column to extract from CSV',
displayOptions: {
show: {
'/loader': ['csvLoader', 'auto'],
},
},
},
{
displayName: 'Split Pages in PDF',
description: 'Whether to split PDF pages into separate documents',
name: 'splitPages',
type: 'boolean',
default: true,
displayOptions: {
show: {
'/loader': ['pdfLoader', 'auto'],
},
},
},
{
...metadataFilterField,
displayName: 'Metadata',
description:
'Metadata to add to each document. Could be used for filtering during retrieval',
placeholder: 'Add property',
},
],
},
],
};
async supplyData(this: IExecuteFunctions, itemIndex: number): Promise<SupplyData> {
const dataType = this.getNodeParameter('dataType', itemIndex, 'json') as 'json' | 'binary';
feat: AI nodes usability fixes + Summarization Chain V2 (#7949) Fixes: - Refactor connection snapping when dragging and enable it also for non-main connection types - Fix propagation of errors from sub-nodes - Fix chat scrolling when sending/receiving messages - Prevent empty chat messages - Fix sub-node selected styles - Fix output names text overflow Usability improvements: - Auto-add manual chat trigger for agents & chain nodes - Various labels and description updates - Make the output parser input optional for Basic LLM Chain - Summarization Chain V2 with a simplified document loader & text chunking mode #### How to test the change: Example workflow showcasing different operation mode of the new summarization chain: [Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json) ## Issues fixed Include links to Github issue or Community forum post or **Linear ticket**: > Important in order to close automatically and provide context to reviewers - https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d - https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch - https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs ## Review / Merge checklist - [x] PR title and summary are descriptive. **Remember, the title automatically goes into the changelog. Use `(no-changelog)` otherwise.** ([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md)) - [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up ticket created. - [ ] Tests included. > A bug is not considered fixed, unless a test is added to prevent it from happening again. A feature is not complete without tests. > > *(internal)* You can use Slack commands to trigger [e2e tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227) or [deploy test instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce) or [deploy early access version on Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e). --------- Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 04:42:32 -08:00
const textSplitter = (await this.getInputConnectionData(
NodeConnectionType.AiTextSplitter,
0,
)) as TextSplitter | undefined;
const binaryDataKey = this.getNodeParameter('binaryDataKey', itemIndex, '') as string;
const processor =
dataType === 'binary'
feat: AI nodes usability fixes + Summarization Chain V2 (#7949) Fixes: - Refactor connection snapping when dragging and enable it also for non-main connection types - Fix propagation of errors from sub-nodes - Fix chat scrolling when sending/receiving messages - Prevent empty chat messages - Fix sub-node selected styles - Fix output names text overflow Usability improvements: - Auto-add manual chat trigger for agents & chain nodes - Various labels and description updates - Make the output parser input optional for Basic LLM Chain - Summarization Chain V2 with a simplified document loader & text chunking mode #### How to test the change: Example workflow showcasing different operation mode of the new summarization chain: [Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json) ## Issues fixed Include links to Github issue or Community forum post or **Linear ticket**: > Important in order to close automatically and provide context to reviewers - https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d - https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch - https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs ## Review / Merge checklist - [x] PR title and summary are descriptive. **Remember, the title automatically goes into the changelog. Use `(no-changelog)` otherwise.** ([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md)) - [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up ticket created. - [ ] Tests included. > A bug is not considered fixed, unless a test is added to prevent it from happening again. A feature is not complete without tests. > > *(internal)* You can use Slack commands to trigger [e2e tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227) or [deploy test instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce) or [deploy early access version on Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e). --------- Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 04:42:32 -08:00
? new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter)
: new N8nJsonLoader(this, 'options.', textSplitter);
return {
response: logWrapper(processor, this),
};
}
}