2023-11-29 03:13:55 -08:00
|
|
|
/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
|
|
|
|
import {
|
|
|
|
NodeConnectionType,
|
|
|
|
type INodeType,
|
|
|
|
type INodeTypeDescription,
|
2024-10-28 03:37:23 -07:00
|
|
|
type ISupplyDataFunctions,
|
2023-11-29 03:13:55 -08:00
|
|
|
type SupplyData,
|
|
|
|
} from 'n8n-workflow';
|
2024-03-07 02:36:36 -08:00
|
|
|
import type {
|
|
|
|
RecursiveCharacterTextSplitterParams,
|
|
|
|
SupportedTextSplitterLanguage,
|
2024-05-24 05:43:17 -07:00
|
|
|
} from '@langchain/textsplitters';
|
|
|
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
2023-11-29 03:13:55 -08:00
|
|
|
import { logWrapper } from '../../../utils/logWrapper';
|
|
|
|
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
|
|
|
|
2024-03-07 02:36:36 -08:00
|
|
|
const supportedLanguages: SupportedTextSplitterLanguage[] = [
|
|
|
|
'cpp',
|
|
|
|
'go',
|
|
|
|
'java',
|
|
|
|
'js',
|
|
|
|
'php',
|
|
|
|
'proto',
|
|
|
|
'python',
|
|
|
|
'rst',
|
|
|
|
'ruby',
|
|
|
|
'rust',
|
|
|
|
'scala',
|
|
|
|
'swift',
|
|
|
|
'markdown',
|
|
|
|
'latex',
|
|
|
|
'html',
|
|
|
|
];
|
2023-11-29 03:13:55 -08:00
|
|
|
export class TextSplitterRecursiveCharacterTextSplitter implements INodeType {
|
|
|
|
description: INodeTypeDescription = {
|
|
|
|
displayName: 'Recursive Character Text Splitter',
|
|
|
|
name: 'textSplitterRecursiveCharacterTextSplitter',
|
|
|
|
icon: 'fa:grip-lines-vertical',
|
|
|
|
group: ['transform'],
|
|
|
|
version: 1,
|
|
|
|
description: 'Split text into chunks by characters recursively, recommended for most use cases',
|
|
|
|
defaults: {
|
|
|
|
name: 'Recursive Character Text Splitter',
|
|
|
|
},
|
|
|
|
codex: {
|
|
|
|
categories: ['AI'],
|
|
|
|
subcategories: {
|
|
|
|
AI: ['Text Splitters'],
|
|
|
|
},
|
|
|
|
resources: {
|
|
|
|
primaryDocumentation: [
|
|
|
|
{
|
|
|
|
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.textsplitterrecursivecharactertextsplitter/',
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
|
|
|
|
inputs: [],
|
|
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
|
|
|
|
outputs: [NodeConnectionType.AiTextSplitter],
|
|
|
|
outputNames: ['Text Splitter'],
|
|
|
|
properties: [
|
|
|
|
getConnectionHintNoticeField([NodeConnectionType.AiDocument]),
|
|
|
|
{
|
|
|
|
displayName: 'Chunk Size',
|
|
|
|
name: 'chunkSize',
|
|
|
|
type: 'number',
|
|
|
|
default: 1000,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
displayName: 'Chunk Overlap',
|
|
|
|
name: 'chunkOverlap',
|
|
|
|
type: 'number',
|
|
|
|
default: 0,
|
|
|
|
},
|
2024-03-07 02:36:36 -08:00
|
|
|
{
|
|
|
|
displayName: 'Options',
|
|
|
|
name: 'options',
|
|
|
|
placeholder: 'Add Option',
|
|
|
|
description: 'Additional options to add',
|
|
|
|
type: 'collection',
|
|
|
|
default: {},
|
|
|
|
options: [
|
|
|
|
{
|
|
|
|
displayName: 'Split Code',
|
|
|
|
name: 'splitCode',
|
|
|
|
default: 'markdown',
|
|
|
|
type: 'options',
|
|
|
|
options: supportedLanguages.map((lang) => ({ name: lang, value: lang })),
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
2023-11-29 03:13:55 -08:00
|
|
|
],
|
|
|
|
};
|
|
|
|
|
2024-10-28 03:37:23 -07:00
|
|
|
async supplyData(this: ISupplyDataFunctions, itemIndex: number): Promise<SupplyData> {
|
2024-08-28 00:32:53 -07:00
|
|
|
this.logger.debug('Supply Data for Text Splitter');
|
2023-11-29 03:13:55 -08:00
|
|
|
|
|
|
|
const chunkSize = this.getNodeParameter('chunkSize', itemIndex) as number;
|
|
|
|
const chunkOverlap = this.getNodeParameter('chunkOverlap', itemIndex) as number;
|
2024-03-07 02:36:36 -08:00
|
|
|
const splitCode = this.getNodeParameter(
|
|
|
|
'options.splitCode',
|
|
|
|
itemIndex,
|
|
|
|
null,
|
|
|
|
) as SupportedTextSplitterLanguage | null;
|
2023-11-29 03:13:55 -08:00
|
|
|
const params: RecursiveCharacterTextSplitterParams = {
|
|
|
|
// TODO: These are the default values, should we allow the user to change them?
|
|
|
|
separators: ['\n\n', '\n', ' ', ''],
|
|
|
|
chunkSize,
|
|
|
|
chunkOverlap,
|
|
|
|
keepSeparator: false,
|
|
|
|
};
|
2024-03-07 02:36:36 -08:00
|
|
|
let splitter: RecursiveCharacterTextSplitter;
|
2023-11-29 03:13:55 -08:00
|
|
|
|
2024-03-07 02:36:36 -08:00
|
|
|
if (splitCode && supportedLanguages.includes(splitCode)) {
|
|
|
|
splitter = RecursiveCharacterTextSplitter.fromLanguage(splitCode, params);
|
|
|
|
} else {
|
|
|
|
splitter = new RecursiveCharacterTextSplitter(params);
|
|
|
|
}
|
2023-11-29 03:13:55 -08:00
|
|
|
|
|
|
|
return {
|
|
|
|
response: logWrapper(splitter, this),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|