n8n/packages/@n8n/nodes-langchain/nodes/text_splitters/TextSplitterTokenSplitter/TextSplitterTokenSplitter.node.ts

82 lines
2.2 KiB
TypeScript

/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
import {
NodeConnectionType,
type IExecuteFunctions,
type INodeType,
type INodeTypeDescription,
type SupplyData,
} from 'n8n-workflow';
import { TokenTextSplitter } from '@langchain/textsplitters';
import { logWrapper } from '../../../utils/logWrapper';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
export class TextSplitterTokenSplitter implements INodeType {
description: INodeTypeDescription = {
displayName: 'Token Splitter',
name: 'textSplitterTokenSplitter',
icon: 'fa:grip-lines-vertical',
group: ['transform'],
version: 1,
description: 'Split text into chunks by tokens',
defaults: {
name: 'Token Splitter',
},
codex: {
categories: ['AI'],
subcategories: {
AI: ['Text Splitters'],
},
resources: {
primaryDocumentation: [
{
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.textsplittertokensplitter/',
},
],
},
},
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
inputs: [],
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
outputs: [NodeConnectionType.AiTextSplitter],
outputNames: ['Text Splitter'],
properties: [
getConnectionHintNoticeField([NodeConnectionType.AiDocument]),
{
displayName: 'Chunk Size',
name: 'chunkSize',
type: 'number',
default: 1000,
},
{
displayName: 'Chunk Overlap',
name: 'chunkOverlap',
type: 'number',
default: 0,
},
],
};
async supplyData(this: IExecuteFunctions, itemIndex: number): Promise<SupplyData> {
this.logger.debug('Supply Data for Text Splitter');
const chunkSize = this.getNodeParameter('chunkSize', itemIndex) as number;
const chunkOverlap = this.getNodeParameter('chunkOverlap', itemIndex) as number;
const splitter = new TokenTextSplitter({
chunkSize,
chunkOverlap,
allowedSpecial: 'all',
disallowedSpecial: 'all',
encodingName: 'cl100k_base',
keepSeparator: false,
// allowedSpecial: 'all',
// disallowedSpecial: 'all',
// encodingName: 'cl100k_base',
});
return {
response: logWrapper(splitter, this),
};
}
}