n8n/packages/@n8n/nodes-langchain/nodes/chains/ChainSummarization/V2/ChainSummarizationV2.node.ts
oleg dcf12867b3
feat: AI nodes usability fixes + Summarization Chain V2 (#7949)
Fixes:
- Refactor connection snapping when dragging and enable it also for
non-main connection types
- Fix propagation of errors from sub-nodes
- Fix chat scrolling when sending/receiving messages
- Prevent empty chat messages
- Fix sub-node selected styles
- Fix output names text overflow

Usability improvements:
- Auto-add manual chat trigger for agents & chain nodes
- Various labels and description updates
- Make the output parser input optional for Basic LLM Chain
- Summarization Chain V2 with a simplified document loader & text
chunking mode

#### How to test the change:
Example workflow showcasing different operation mode of the new
summarization chain:

[Summarization_V2.json](https://github.com/n8n-io/n8n/files/13599901/Summarization_V2.json)


## Issues fixed
Include links to Github issue or Community forum post or **Linear
ticket**:
> Important in order to close automatically and provide context to
reviewers
-
https://www.notion.so/n8n/David-Langchain-Posthog-notes-7a9294938420403095f4508f1a21d31d
- https://linear.app/n8n/issue/N8N-7070/ux-fixes-batch
- https://linear.app/n8n/issue/N8N-7071/ai-sub-node-bugs


## Review / Merge checklist
- [x] PR title and summary are descriptive. **Remember, the title
automatically goes into the changelog. Use `(no-changelog)` otherwise.**
([conventions](https://github.com/n8n-io/n8n/blob/master/.github/pull_request_title_conventions.md))
- [x] [Docs updated](https://github.com/n8n-io/n8n-docs) or follow-up
ticket created.
- [ ] Tests included.
> A bug is not considered fixed, unless a test is added to prevent it
from happening again. A feature is not complete without tests.
  >
> *(internal)* You can use Slack commands to trigger [e2e
tests](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#a39f9e5ba64a48b58a71d81c837e8227)
or [deploy test
instance](https://www.notion.so/n8n/How-to-use-Test-Instances-d65f49dfc51f441ea44367fb6f67eb0a?pvs=4#f6a177d32bde4b57ae2da0b8e454bfce)
or [deploy early access version on
Cloud](https://www.notion.so/n8n/Cloudbot-3dbe779836004972b7057bc989526998?pvs=4#fef2d36ab02247e1a0f65a74f6fb534e).

---------

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
Co-authored-by: Elias Meire <elias@meire.dev>
2023-12-08 13:42:32 +01:00

421 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { NodeConnectionType } from 'n8n-workflow';
import type {
INodeTypeBaseDescription,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
IDataObject,
} from 'n8n-workflow';
import { loadSummarizationChain } from 'langchain/chains';
import type { BaseLanguageModel } from 'langchain/dist/base_language';
import type { Document } from 'langchain/document';
import type { TextSplitter } from 'langchain/text_splitter';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { N8nJsonLoader } from '../../../../utils/N8nJsonLoader';
import { N8nBinaryLoader } from '../../../../utils/N8nBinaryLoader';
import { getTemplateNoticeField } from '../../../../utils/sharedFields';
import { REFINE_PROMPT_TEMPLATE, DEFAULT_PROMPT_TEMPLATE } from '../prompt';
import { getChainPromptsArgs } from '../helpers';
function getInputs(parameters: IDataObject) {
const chunkingMode = parameters?.chunkingMode;
const operationMode = parameters?.operationMode;
const inputs = [
{ displayName: '', type: NodeConnectionType.Main },
{
displayName: 'Model',
maxConnections: 1,
type: NodeConnectionType.AiLanguageModel,
required: true,
},
];
if (operationMode === 'documentLoader') {
inputs.push({
displayName: 'Document',
type: NodeConnectionType.AiDocument,
required: true,
maxConnections: 1,
});
return inputs;
}
if (chunkingMode === 'advanced') {
inputs.push({
displayName: 'Text Splitter',
type: NodeConnectionType.AiTextSplitter,
required: false,
maxConnections: 1,
});
return inputs;
}
return inputs;
}
export class ChainSummarizationV2 implements INodeType {
description: INodeTypeDescription;
constructor(baseDescription: INodeTypeBaseDescription) {
this.description = {
...baseDescription,
version: [2],
defaults: {
name: 'Summarization Chain',
color: '#909298',
},
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
inputs: `={{ ((parameter) => { ${getInputs.toString()}; return getInputs(parameter) })($parameter) }}`,
outputs: [NodeConnectionType.Main],
credentials: [],
properties: [
getTemplateNoticeField(1951),
{
displayName: 'Data to Summarize',
name: 'operationMode',
noDataExpression: true,
type: 'options',
description: 'How to pass data into the summarization chain',
default: 'nodeInputJson',
options: [
{
name: 'Use Node Input (JSON)',
value: 'nodeInputJson',
description: 'Summarize the JSON data coming into this node from the previous one',
},
{
name: 'Use Node Input (Binary)',
value: 'nodeInputBinary',
description: 'Summarize the binary data coming into this node from the previous one',
},
{
name: 'Use Document Loader',
value: 'documentLoader',
description: 'Use a loader sub-node with more configuration options',
},
],
},
{
displayName: 'Chunking Strategy',
name: 'chunkingMode',
noDataExpression: true,
type: 'options',
description: 'Chunk splitting strategy',
default: 'simple',
options: [
{
name: 'Simple (Define Below)',
value: 'simple',
},
{
name: 'Advanced',
value: 'advanced',
description: 'Use a splitter sub-node with more configuration options',
},
],
displayOptions: {
show: {
'/operationMode': ['nodeInputJson', 'nodeInputBinary'],
},
},
},
{
displayName: 'Characters Per Chunk',
name: 'chunkSize',
description:
'Controls the max size (in terms of number of characters) of the final document chunk',
type: 'number',
default: 1000,
displayOptions: {
show: {
'/chunkingMode': ['simple'],
},
},
},
{
displayName: 'Chunk Overlap (Characters)',
name: 'chunkOverlap',
type: 'number',
description: 'Specifies how much characters overlap there should be between chunks',
default: 200,
displayOptions: {
show: {
'/chunkingMode': ['simple'],
},
},
},
{
displayName: 'Options',
name: 'options',
type: 'collection',
default: {},
placeholder: 'Add Option',
options: [
{
displayName: 'Input Data Field Name',
name: 'binaryDataKey',
type: 'string',
default: 'data',
description:
'The name of the field in the agent or chains input that contains the binary file to be processed',
displayOptions: {
show: {
'/operationMode': ['nodeInputBinary'],
},
},
},
{
displayName: 'Summarization Method and Prompts',
name: 'summarizationMethodAndPrompts',
type: 'fixedCollection',
default: {
values: {
summarizationMethod: 'map_reduce',
prompt: DEFAULT_PROMPT_TEMPLATE,
combineMapPrompt: DEFAULT_PROMPT_TEMPLATE,
},
},
placeholder: 'Add Option',
typeOptions: {},
options: [
{
name: 'values',
displayName: 'Values',
values: [
{
displayName: 'Summarization Method',
name: 'summarizationMethod',
type: 'options',
description: 'The type of summarization to run',
default: 'map_reduce',
options: [
{
name: 'Map Reduce (Recommended)',
value: 'map_reduce',
description:
'Summarize each document (or chunk) individually, then summarize those summaries',
},
{
name: 'Refine',
value: 'refine',
description:
'Summarize the first document (or chunk). Then update that summary based on the next document (or chunk), and repeat.',
},
{
name: 'Stuff',
value: 'stuff',
description:
'Pass all documents (or chunks) at once. Ideal for small datasets.',
},
],
},
{
displayName: 'Final Prompt to Combine',
name: 'combineMapPrompt',
type: 'string',
hint: 'The prompt to combine individual summaries',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'refine',
],
},
},
default: DEFAULT_PROMPT_TEMPLATE,
typeOptions: {
rows: 9,
},
},
{
displayName: 'Individual Summary Prompt',
name: 'prompt',
type: 'string',
default: DEFAULT_PROMPT_TEMPLATE,
hint: 'The prompt to summarize an individual document (or chunk)',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'refine',
],
},
},
typeOptions: {
rows: 9,
},
},
{
displayName: 'Prompt',
name: 'prompt',
type: 'string',
default: DEFAULT_PROMPT_TEMPLATE,
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'refine',
'map_reduce',
],
},
},
typeOptions: {
rows: 9,
},
},
{
displayName: 'Subsequent (Refine) Prompt',
name: 'refinePrompt',
type: 'string',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'map_reduce',
],
},
},
default: REFINE_PROMPT_TEMPLATE,
hint: 'The prompt to refine the summary based on the next document (or chunk)',
typeOptions: {
rows: 9,
},
},
{
displayName: 'Initial Prompt',
name: 'refineQuestionPrompt',
type: 'string',
displayOptions: {
hide: {
'/options.summarizationMethodAndPrompts.values.summarizationMethod': [
'stuff',
'map_reduce',
],
},
},
default: DEFAULT_PROMPT_TEMPLATE,
hint: 'The prompt for the first document (or chunk)',
typeOptions: {
rows: 9,
},
},
],
},
],
},
],
},
],
};
}
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
this.logger.verbose('Executing Summarization Chain V2');
const operationMode = this.getNodeParameter('operationMode', 0, 'nodeInputJson') as
| 'nodeInputJson'
| 'nodeInputBinary'
| 'documentLoader';
const chunkingMode = this.getNodeParameter('chunkingMode', 0, 'simple') as
| 'simple'
| 'advanced';
const model = (await this.getInputConnectionData(
NodeConnectionType.AiLanguageModel,
0,
)) as BaseLanguageModel;
const items = this.getInputData();
const returnData: INodeExecutionData[] = [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
const summarizationMethodAndPrompts = this.getNodeParameter(
'options.summarizationMethodAndPrompts.values',
itemIndex,
{},
) as {
prompt?: string;
refineQuestionPrompt?: string;
refinePrompt?: string;
summarizationMethod: 'map_reduce' | 'stuff' | 'refine';
combineMapPrompt?: string;
};
const chainArgs = getChainPromptsArgs(
summarizationMethodAndPrompts.summarizationMethod ?? 'map_reduce',
summarizationMethodAndPrompts,
);
const chain = loadSummarizationChain(model, chainArgs);
const item = items[itemIndex];
let processedDocuments: Document[];
// Use dedicated document loader input to load documents
if (operationMode === 'documentLoader') {
const documentInput = (await this.getInputConnectionData(
NodeConnectionType.AiDocument,
0,
)) as N8nJsonLoader | Array<Document<Record<string, unknown>>>;
const isN8nLoader =
documentInput instanceof N8nJsonLoader || documentInput instanceof N8nBinaryLoader;
processedDocuments = isN8nLoader
? await documentInput.processItem(item, itemIndex)
: documentInput;
const response = await chain.call({
input_documents: processedDocuments,
});
returnData.push({ json: { response } });
}
// Take the input and use binary or json loader
if (['nodeInputJson', 'nodeInputBinary'].includes(operationMode)) {
let textSplitter: TextSplitter | undefined;
switch (chunkingMode) {
// In simple mode we use recursive character splitter with default settings
case 'simple':
const chunkSize = this.getNodeParameter('chunkSize', itemIndex, 1000) as number;
const chunkOverlap = this.getNodeParameter('chunkOverlap', itemIndex, 200) as number;
textSplitter = new RecursiveCharacterTextSplitter({ chunkOverlap, chunkSize });
break;
// In advanced mode user can connect text splitter node so we just retrieve it
case 'advanced':
textSplitter = (await this.getInputConnectionData(
NodeConnectionType.AiTextSplitter,
0,
)) as TextSplitter | undefined;
break;
default:
break;
}
let processor: N8nJsonLoader | N8nBinaryLoader;
if (operationMode === 'nodeInputBinary') {
const binaryDataKey = this.getNodeParameter(
'options.binaryDataKey',
itemIndex,
'data',
) as string;
processor = new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter);
} else {
processor = new N8nJsonLoader(this, 'options.', textSplitter);
}
const processedItem = await processor.processItem(item, itemIndex);
const response = await chain.call({
input_documents: processedItem,
});
returnData.push({ json: { response } });
}
}
return this.prepareOutputData(returnData);
}
}