mirror of
https://github.com/n8n-io/n8n.git
synced 2024-11-15 00:54:06 -08:00
a77e8dd79e
Signed-off-by: Oleg Ivaniv <me@olegivaniv.com> Co-authored-by: Oleg Ivaniv <me@olegivaniv.com>
82 lines
2.2 KiB
TypeScript
82 lines
2.2 KiB
TypeScript
/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
|
|
import {
|
|
NodeConnectionType,
|
|
type IExecuteFunctions,
|
|
type INodeType,
|
|
type INodeTypeDescription,
|
|
type SupplyData,
|
|
} from 'n8n-workflow';
|
|
import { TokenTextSplitter } from '@langchain/textsplitters';
|
|
import { logWrapper } from '../../../utils/logWrapper';
|
|
import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
|
|
|
|
export class TextSplitterTokenSplitter implements INodeType {
|
|
description: INodeTypeDescription = {
|
|
displayName: 'Token Splitter',
|
|
name: 'textSplitterTokenSplitter',
|
|
icon: 'fa:grip-lines-vertical',
|
|
group: ['transform'],
|
|
version: 1,
|
|
description: 'Split text into chunks by tokens',
|
|
defaults: {
|
|
name: 'Token Splitter',
|
|
},
|
|
codex: {
|
|
categories: ['AI'],
|
|
subcategories: {
|
|
AI: ['Text Splitters'],
|
|
},
|
|
resources: {
|
|
primaryDocumentation: [
|
|
{
|
|
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.textsplittertokensplitter/',
|
|
},
|
|
],
|
|
},
|
|
},
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
|
|
inputs: [],
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
|
|
outputs: [NodeConnectionType.AiTextSplitter],
|
|
outputNames: ['Text Splitter'],
|
|
properties: [
|
|
getConnectionHintNoticeField([NodeConnectionType.AiDocument]),
|
|
{
|
|
displayName: 'Chunk Size',
|
|
name: 'chunkSize',
|
|
type: 'number',
|
|
default: 1000,
|
|
},
|
|
{
|
|
displayName: 'Chunk Overlap',
|
|
name: 'chunkOverlap',
|
|
type: 'number',
|
|
default: 0,
|
|
},
|
|
],
|
|
};
|
|
|
|
async supplyData(this: IExecuteFunctions, itemIndex: number): Promise<SupplyData> {
|
|
this.logger.verbose('Supply Data for Text Splitter');
|
|
|
|
const chunkSize = this.getNodeParameter('chunkSize', itemIndex) as number;
|
|
const chunkOverlap = this.getNodeParameter('chunkOverlap', itemIndex) as number;
|
|
|
|
const splitter = new TokenTextSplitter({
|
|
chunkSize,
|
|
chunkOverlap,
|
|
allowedSpecial: 'all',
|
|
disallowedSpecial: 'all',
|
|
encodingName: 'cl100k_base',
|
|
keepSeparator: false,
|
|
// allowedSpecial: 'all',
|
|
// disallowedSpecial: 'all',
|
|
// encodingName: 'cl100k_base',
|
|
});
|
|
|
|
return {
|
|
response: logWrapper(splitter, this),
|
|
};
|
|
}
|
|
}
|