n8n/packages/@n8n/nodes-langchain/nodes/chains/ChainSummarization/V2/ChainSummarizationV2.node.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

421 lines
12 KiB
TypeScript
Raw Normal View History

feat: AI nodes usability fixes + Summarization Chain V2 (#7949) Fixes: - Refactor connection snapping when dragging and enable it also for non-main connection types - Fix propagation of errors from sub-nodes - Fix chat scrolling when sending/receiving messages - Prevent empty chat messages - Fix sub-node selected styles - Fix output names text overflow Usability improvements: - Auto-add manual chat trigger for agents & chain nodes - Various labels and description updates - Make the output parser input optional for Basic LLM Chain - Summarization Chain V2 with a simplified document loader & text chunking mode #### How to test the change: Example workflow showcasing different operation mode of the new summarization chain: [Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json) ## Issues fixed Include links to Github issue or Community forum post or **Linear ticket**: > Important in order to close automatically and provide context to reviewers - https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d - https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch - https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs ## Review / Merge checklist - [x] PR title and summary are descriptive. **Remember, the title automatically goes into the changelog. Use `(no-changelog)` otherwise.** ([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md)) - [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up ticket created. - [ ] Tests included. > A bug is not considered fixed, unless a test is added to prevent it from happening again. A feature is not complete without tests. > > *(internal)* You can use Slack commands to trigger [e2e tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227) or [deploy test instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce) or [deploy early access version on Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e). --------- Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 04:42:32 -08:00
import { NodeConnectionType } from 'n8n-workflow';
import type {
INodeTypeBaseDescription,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
IDataObject,
} from 'n8n-workflow';
import { loadSummarizationChain } from 'langchain/chains';
import type { BaseLanguageModel } from 'langchain/dist/base_language';
import type { Document } from 'langchain/document';
import type { TextSplitter } from 'langchain/text_splitter';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { N8nJsonLoader } from '../../../../utils/N8nJsonLoader';
import { N8nBinaryLoader } from '../../../../utils/N8nBinaryLoader';
import { getTemplateNoticeField } from '../../../../utils/sharedFields';
import { REFINE_PROMPT_TEMPLATE, DEFAULT_PROMPT_TEMPLATE } from '../prompt';
import { getChainPromptsArgs } from '../helpers';
function getInputs(parameters: IDataObject) {
const chunkingMode = parameters?.chunkingMode;
const operationMode = parameters?.operationMode;
const inputs = [
{ displayName: '', type: NodeConnectionType.Main },
{
displayName: 'Model',
maxConnections: 1,
type: NodeConnectionType.AiLanguageModel,
required: true,
},
];
if (operationMode === 'documentLoader') {
inputs.push({
displayName: 'Document',
type: NodeConnectionType.AiDocument,
required: true,
maxConnections: 1,
});
return inputs;
}
if (chunkingMode === 'advanced') {
inputs.push({
displayName: 'Text Splitter',
type: NodeConnectionType.AiTextSplitter,
required: false,
maxConnections: 1,
});
return inputs;
}
return inputs;
}
export class ChainSummarizationV2 implements INodeType {
description: INodeTypeDescription;
constructor(baseDescription: INodeTypeBaseDescription) {
this.description = {
...baseDescription,
version: [2],
defaults: {
name: 'Summarization Chain',
color: '#909298',
},
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
inputs: `={{ ((parameter) => { ${getInputs.toString()}; return getInputs(parameter) })($parameter) }}`,
outputs: [NodeConnectionType.Main],
credentials: [],
properties: [
getTemplateNoticeField(1951),
{
displayName: 'Data to Summarize',
name: 'operationMode',
noDataExpression: true,
type: 'options',
description: 'How to pass data into the summarization chain',
default: 'nodeInputJson',
options: [
{
name: 'Use Node Input (JSON)',
value: 'nodeInputJson',
description: 'Summarize the JSON data coming into this node from the previous one',
},
{
name: 'Use Node Input (Binary)',
value: 'nodeInputBinary',
description: 'Summarize the binary data coming into this node from the previous one',
},
{
name: 'Use Document Loader',
value: 'documentLoader',
description: 'Use a loader sub-node with more configuration options',
},
],
},
{
displayName: 'Chunking Strategy',
name: 'chunkingMode',
noDataExpression: true,
type: 'options',
description: 'Chunk splitting strategy',
default: 'simple',
options: [
{
name: 'Simple (Define Below)',
value: 'simple',
},
{
name: 'Advanced',
value: 'advanced',
description: 'Use a splitter sub-node with more configuration options',
},
],
displayOptions: {
show: {
'/operationMode': ['nodeInputJson', 'nodeInputBinary'],
},
},
},
{
displayName: 'Characters Per Chunk',
name: 'chunkSize',
description:
'Controls the max size (in terms of number of characters) of the final document chunk',
type: 'number',
default: 1000,
displayOptions: {
show: {
'/chunkingMode': ['simple'],
},
},
},
{
displayName: 'Chunk Overlap (Characters)',
name: 'chunkOverlap',
type: 'number',
description: 'Specifies how much characters overlap there should be between chunks',
default: 200,
displayOptions: {
show: {
'/chunkingMode': ['simple'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
default: {},
placeholder: 'Add Option',
options: [
{
displayName: 'Input Data Field Name',
name: 'binaryDataKey',
type: 'string',
default: 'data',
description:
'The name of the field in the agent or chains input that contains the binary file to be processed',
displayOptions: {
show: {
'/operationMode': ['nodeInputBinary'],
},
},
},
{
displayName: 'Summarization Method and Prompts',
name: 'summarizationMethodAndPrompts',
type: 'fixedCollection',
default: {
values: {
summarizationMethod: 'map_reduce',
prompt: DEFAULT_PROMPT_TEMPLATE,
combineMapPrompt: DEFAULT_PROMPT_TEMPLATE,
},
},
placeholder: 'Add Option',
typeOptions: {},
options: [
{
name: 'values',
displayName: 'Values',
values: [
{
displayName: 'Summarization Method',
name: 'summarizationMethod',
type: 'options',
description: 'The type of summarization to run',
default: 'map_reduce',
options: [
{
name: 'Map Reduce (Recommended)',
value: 'map_reduce',
description:
'Summarize each document (or chunk) individually, then summarize those summaries',
},
{
name: 'Refine',
value: 'refine',
description:
'Summarize the first document (or chunk). Then update that summary based on the next document (or chunk), and repeat.',
},
{
name: 'Stuff',
value: 'stuff',
description:
'Pass all documents (or chunks) at once. Ideal for small datasets.',
},
],
},
{
displayName: 'Final Prompt to Combine',
name: 'combineMapPrompt',
type: 'string',
hint: 'The prompt to combine individual summaries',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'refine',
],
},
},
default: DEFAULT_PROMPT_TEMPLATE,
typeOptions: {
rows: 9,
},
},
{
displayName: 'Individual Summary Prompt',
name: 'prompt',
type: 'string',
default: DEFAULT_PROMPT_TEMPLATE,
hint: 'The prompt to summarize an individual document (or chunk)',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'refine',
],
},
},
typeOptions: {
rows: 9,
},
},
{
displayName: 'Prompt',
name: 'prompt',
type: 'string',
default: DEFAULT_PROMPT_TEMPLATE,
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'refine',
'map_reduce',
],
},
},
typeOptions: {
rows: 9,
},
},
{
displayName: 'Subsequent (Refine) Prompt',
name: 'refinePrompt',
type: 'string',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'map_reduce',
],
},
},
default: REFINE_PROMPT_TEMPLATE,
hint: 'The prompt to refine the summary based on the next document (or chunk)',
typeOptions: {
rows: 9,
},
},
{
displayName: 'Initial Prompt',
name: 'refineQuestionPrompt',
type: 'string',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'map_reduce',
],
},
},
default: DEFAULT_PROMPT_TEMPLATE,
hint: 'The prompt for the first document (or chunk)',
typeOptions: {
rows: 9,
},
},
],
},
],
},
],
},
],
};
}
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
this.logger.verbose('Executing Summarization Chain V2');
const operationMode = this.getNodeParameter('operationMode', 0, 'nodeInputJson') as
| 'nodeInputJson'
| 'nodeInputBinary'
| 'documentLoader';
const chunkingMode = this.getNodeParameter('chunkingMode', 0, 'simple') as
| 'simple'
| 'advanced';
const model = (await this.getInputConnectionData(
NodeConnectionType.AiLanguageModel,
0,
)) as BaseLanguageModel;
const items = this.getInputData();
const returnData: INodeExecutionData[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
const summarizationMethodAndPrompts = this.getNodeParameter(
'options.summarizationMethodAndPrompts.values',
itemIndex,
{},
) as {
prompt?: string;
refineQuestionPrompt?: string;
refinePrompt?: string;
summarizationMethod: 'map_reduce' | 'stuff' | 'refine';
combineMapPrompt?: string;
};
const chainArgs = getChainPromptsArgs(
summarizationMethodAndPrompts.summarizationMethod ?? 'map_reduce',
summarizationMethodAndPrompts,
);
const chain = loadSummarizationChain(model, chainArgs);
const item = items[itemIndex];
let processedDocuments: Document[];
// Use dedicated document loader input to load documents
if (operationMode === 'documentLoader') {
const documentInput = (await this.getInputConnectionData(
NodeConnectionType.AiDocument,
0,
)) as N8nJsonLoader | Array<Document<Record<string, unknown>>>;
const isN8nLoader =
documentInput instanceof N8nJsonLoader || documentInput instanceof N8nBinaryLoader;
processedDocuments = isN8nLoader
? await documentInput.processItem(item, itemIndex)
: documentInput;
const response = await chain.call({
input_documents: processedDocuments,
});
returnData.push({ json: { response } });
}
// Take the input and use binary or json loader
if (['nodeInputJson', 'nodeInputBinary'].includes(operationMode)) {
let textSplitter: TextSplitter | undefined;
switch (chunkingMode) {
// In simple mode we use recursive character splitter with default settings
case 'simple':
const chunkSize = this.getNodeParameter('chunkSize', itemIndex, 1000) as number;
const chunkOverlap = this.getNodeParameter('chunkOverlap', itemIndex, 200) as number;
textSplitter = new RecursiveCharacterTextSplitter({ chunkOverlap, chunkSize });
break;
// In advanced mode user can connect text splitter node so we just retrieve it
case 'advanced':
textSplitter = (await this.getInputConnectionData(
NodeConnectionType.AiTextSplitter,
0,
)) as TextSplitter | undefined;
break;
default:
break;
}
let processor: N8nJsonLoader | N8nBinaryLoader;
if (operationMode === 'nodeInputBinary') {
const binaryDataKey = this.getNodeParameter(
'options.binaryDataKey',
itemIndex,
'data',
) as string;
processor = new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter);
} else {
processor = new N8nJsonLoader(this, 'options.', textSplitter);
}
const processedItem = await processor.processItem(item, itemIndex);
const response = await chain.call({
input_documents: processedItem,
});
returnData.push({ json: { response } });
}
}
return await this.prepareOutputData(returnData);
feat: AI nodes usability fixes + Summarization Chain V2 (#7949) Fixes: - Refactor connection snapping when dragging and enable it also for non-main connection types - Fix propagation of errors from sub-nodes - Fix chat scrolling when sending/receiving messages - Prevent empty chat messages - Fix sub-node selected styles - Fix output names text overflow Usability improvements: - Auto-add manual chat trigger for agents & chain nodes - Various labels and description updates - Make the output parser input optional for Basic LLM Chain - Summarization Chain V2 with a simplified document loader & text chunking mode #### How to test the change: Example workflow showcasing different operation mode of the new summarization chain: [Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json) ## Issues fixed Include links to Github issue or Community forum post or **Linear ticket**: > Important in order to close automatically and provide context to reviewers - https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d - https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch - https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs ## Review / Merge checklist - [x] PR title and summary are descriptive. **Remember, the title automatically goes into the changelog. Use `(no-changelog)` otherwise.** ([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md)) - [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up ticket created. - [ ] Tests included. > A bug is not considered fixed, unless a test is added to prevent it from happening again. A feature is not complete without tests. > > *(internal)* You can use Slack commands to trigger [e2e tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227) or [deploy test instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce) or [deploy early access version on Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e). --------- Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 04:42:32 -08:00
}
}