n8n/packages/@n8n/nodes-langchain/nodes/document_loaders/DocumentDefaultDataLoader/DocumentDefaultDataLoader.node.ts
oleg dcf12867b3
feat: AI nodes usability fixes + Summarization Chain V2 (#7949)
Fixes:
- Refactor connection snapping when dragging and enable it also for
non-main connection types
- Fix propagation of errors from sub-nodes
- Fix chat scrolling when sending/receiving messages
- Prevent empty chat messages
- Fix sub-node selected styles
- Fix output names text overflow

Usability improvements:
- Auto-add manual chat trigger for agents & chain nodes
- Various labels and description updates
- Make the output parser input optional for Basic LLM Chain
- Summarization Chain V2 with a simplified document loader & text
chunking mode

#### How to test the change:
Example workflow showcasing different operation mode of the new
summarization chain:

[Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json)


## Issues fixed
Include links to Github issue or Community forum post or **Linear
ticket**:
> Important in order to close automatically and provide context to
reviewers
-
https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d
- https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch
- https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs


## Review / Merge checklist
- [x] PR title and summary are descriptive. **Remember, the title
automatically goes into the changelog. Use `(no-changelog)` otherwise.**
([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md))
- [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up
ticket created.
- [ ] Tests included.
> A bug is not considered fixed, unless a test is added to prevent it
from happening again. A feature is not complete without tests.
  >
> *(internal)* You can use Slack commands to trigger [e2e
tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227)
or [deploy test
instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce)
or [deploy early access version on
Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e).

---------

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 13:42:32 +01:00

277 lines
6.7 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
import {
NodeConnectionType,
type IExecuteFunctions,
type INodeType,
type INodeTypeDescription,
type SupplyData,
} from 'n8n-workflow';
import type { TextSplitter } from 'langchain/text_splitter';
import { logWrapper } from '../../../utils/logWrapper';
import { N8nBinaryLoader } from '../../../utils/N8nBinaryLoader';
import { metadataFilterField } from '../../../utils/sharedFields';
// Dependencies needed underneath the hood for the loaders. We add them
// here only to track where what dependency is sued
// import 'd3-dsv'; // for csv
import 'mammoth'; // for docx
import 'epub2'; // for epub
import 'pdf-parse'; // for pdf
import { N8nJsonLoader } from '../../../utils/N8nJsonLoader';
export class DocumentDefaultDataLoader implements INodeType {
description: INodeTypeDescription = {
displayName: 'Default Data Loader',
name: 'documentDefaultDataLoader',
icon: 'file:binary.svg',
group: ['transform'],
version: 1,
description: 'Load data from previous step in the workflow',
defaults: {
name: 'Default Data Loader',
},
codex: {
categories: ['AI'],
subcategories: {
AI: ['Document Loaders'],
},
resources: {
primaryDocumentation: [
{
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/',
},
],
},
},
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
inputs: [
{
displayName: 'Text Splitter',
maxConnections: 1,
type: NodeConnectionType.AiTextSplitter,
required: true,
},
],
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
outputs: [NodeConnectionType.AiDocument],
outputNames: ['Document'],
properties: [
{
displayName:
'This will load data from a previous step in the workflow. <a href="/templates/1962" target="_blank">Example</a>',
name: 'notice',
type: 'notice',
default: '',
},
{
displayName: 'Type of Data',
name: 'dataType',
type: 'options',
default: 'json',
required: true,
noDataExpression: true,
options: [
{
name: 'JSON',
value: 'json',
description: 'Process JSON data from previous step in the workflow',
},
{
name: 'Binary',
value: 'binary',
description: 'Process binary data from previous step in the workflow',
},
],
},
{
displayName: 'Mode',
name: 'jsonMode',
type: 'options',
default: 'allInputData',
required: true,
displayOptions: {
show: {
dataType: ['json'],
},
},
options: [
{
name: 'Load All Input Data',
value: 'allInputData',
description: 'Use all JSON data that flows into the parent agent or chain',
},
{
name: 'Load Specific Data',
value: 'expressionData',
description:
'Load a subset of data, and/or data from any previous step in the workflow',
},
],
},
{
displayName: 'Data Format',
name: 'loader',
type: 'options',
default: 'auto',
required: true,
displayOptions: {
show: {
dataType: ['binary'],
},
},
options: [
{
name: 'Automatically Detect by Mime Type',
value: 'auto',
description: 'Uses the mime type to detect the format',
},
{
name: 'CSV',
value: 'csvLoader',
description: 'Load CSV files',
},
{
name: 'Docx',
value: 'docxLoader',
description: 'Load Docx documents',
},
{
name: 'EPub',
value: 'epubLoader',
description: 'Load EPub files',
},
{
name: 'JSON',
value: 'jsonLoader',
description: 'Load JSON files',
},
{
name: 'PDF',
value: 'pdfLoader',
description: 'Load PDF documents',
},
{
name: 'Text',
value: 'textLoader',
description: 'Load plain text files',
},
],
},
{
displayName: 'Data',
name: 'jsonData',
type: 'string',
typeOptions: {
rows: 6,
},
default: '',
required: true,
description: 'Drag and drop fields from the input pane, or use an expression',
displayOptions: {
show: {
dataType: ['json'],
jsonMode: ['expressionData'],
},
},
},
{
displayName: 'Input Data Field Name',
name: 'binaryDataKey',
type: 'string',
default: 'data',
required: true,
description:
'The name of the field in the agent or chains input that contains the binary file to be processed',
displayOptions: {
show: {
dataType: ['binary'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
placeholder: 'Add Option',
default: {},
options: [
{
displayName: 'JSON Pointers',
name: 'pointers',
type: 'string',
default: '',
description: 'Pointers to extract from JSON, e.g. "/text" or "/text, /meta/title"',
displayOptions: {
show: {
'/loader': ['jsonLoader', 'auto'],
},
},
},
{
displayName: 'CSV Separator',
name: 'separator',
type: 'string',
description: 'Separator to use for CSV',
default: ',',
displayOptions: {
show: {
'/loader': ['csvLoader', 'auto'],
},
},
},
{
displayName: 'CSV Column',
name: 'column',
type: 'string',
default: '',
description: 'Column to extract from CSV',
displayOptions: {
show: {
'/loader': ['csvLoader', 'auto'],
},
},
},
{
displayName: 'Split Pages in PDF',
description: 'Whether to split PDF pages into separate documents',
name: 'splitPages',
type: 'boolean',
default: true,
displayOptions: {
show: {
'/loader': ['pdfLoader', 'auto'],
},
},
},
{
...metadataFilterField,
displayName: 'Metadata',
description:
'Metadata to add to each document. Could be used for filtering during retrieval',
placeholder: 'Add property',
},
],
},
],
};
async supplyData(this: IExecuteFunctions, itemIndex: number): Promise<SupplyData> {
const dataType = this.getNodeParameter('dataType', itemIndex, 'json') as 'json' | 'binary';
const textSplitter = (await this.getInputConnectionData(
NodeConnectionType.AiTextSplitter,
0,
)) as TextSplitter | undefined;
const binaryDataKey = this.getNodeParameter('binaryDataKey', itemIndex, '') as string;
const processor =
dataType === 'binary'
? new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter)
: new N8nJsonLoader(this, 'options.', textSplitter);
return {
response: logWrapper(processor, this),
};
}
}