2023-11-29 03:13:55 -08:00
|
|
|
/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
|
|
|
|
import {
|
|
|
|
NodeConnectionType,
|
|
|
|
type IExecuteFunctions,
|
|
|
|
type INodeType,
|
|
|
|
type INodeTypeDescription,
|
|
|
|
type SupplyData,
|
|
|
|
} from 'n8n-workflow';
|
|
|
|
|
2024-05-24 05:43:17 -07:00
|
|
|
import type { TextSplitter } from '@langchain/textsplitters';
|
2023-12-13 05:45:22 -08:00
|
|
|
|
2023-11-29 03:13:55 -08:00
|
|
|
import { logWrapper } from '../../../utils/logWrapper';
|
|
|
|
import { N8nBinaryLoader } from '../../../utils/N8nBinaryLoader';
|
|
|
|
import { getConnectionHintNoticeField, metadataFilterField } from '../../../utils/sharedFields';
|
|
|
|
|
|
|
|
// Dependencies needed underneath the hood for the loaders. We add them
|
|
|
|
// here only to track where what dependency is sued
|
|
|
|
// import 'd3-dsv'; // for csv
|
|
|
|
import 'mammoth'; // for docx
|
2023-11-30 02:59:37 -08:00
|
|
|
import 'epub2'; // for epub
|
2023-11-29 03:13:55 -08:00
|
|
|
import 'pdf-parse'; // for pdf
|
|
|
|
|
|
|
|
export class DocumentBinaryInputLoader implements INodeType {
|
|
|
|
description: INodeTypeDescription = {
|
|
|
|
// This node is deprecated and will be removed in the future.
|
|
|
|
// The functionality was merged with the `DocumentJSONInputLoader` to `DocumentDefaultDataLoader`
|
|
|
|
hidden: true,
|
|
|
|
displayName: 'Binary Input Loader',
|
|
|
|
name: 'documentBinaryInputLoader',
|
|
|
|
icon: 'file:binary.svg',
|
|
|
|
group: ['transform'],
|
|
|
|
version: 1,
|
|
|
|
description: 'Use binary data from a previous step in the workflow',
|
|
|
|
defaults: {
|
|
|
|
name: 'Binary Input Loader',
|
|
|
|
},
|
|
|
|
codex: {
|
|
|
|
categories: ['AI'],
|
|
|
|
subcategories: {
|
|
|
|
AI: ['Document Loaders'],
|
|
|
|
},
|
|
|
|
resources: {
|
|
|
|
primaryDocumentation: [
|
|
|
|
{
|
2024-01-09 05:50:00 -08:00
|
|
|
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/',
|
2023-11-29 03:13:55 -08:00
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
|
|
|
|
inputs: [
|
|
|
|
{
|
|
|
|
displayName: 'Text Splitter',
|
|
|
|
maxConnections: 1,
|
|
|
|
type: NodeConnectionType.AiTextSplitter,
|
|
|
|
required: true,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
|
|
|
|
outputs: [NodeConnectionType.AiDocument],
|
|
|
|
outputNames: ['Document'],
|
|
|
|
properties: [
|
|
|
|
getConnectionHintNoticeField([NodeConnectionType.AiVectorStore]),
|
|
|
|
{
|
|
|
|
displayName: 'Loader Type',
|
|
|
|
name: 'loader',
|
|
|
|
type: 'options',
|
|
|
|
default: 'jsonLoader',
|
|
|
|
required: true,
|
|
|
|
options: [
|
|
|
|
{
|
|
|
|
name: 'CSV Loader',
|
|
|
|
value: 'csvLoader',
|
|
|
|
description: 'Load CSV files',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'Docx Loader',
|
|
|
|
value: 'docxLoader',
|
|
|
|
description: 'Load Docx documents',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'EPub Loader',
|
|
|
|
value: 'epubLoader',
|
|
|
|
description: 'Load EPub files',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'JSON Loader',
|
|
|
|
value: 'jsonLoader',
|
|
|
|
description: 'Load JSON files',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'PDF Loader',
|
|
|
|
value: 'pdfLoader',
|
|
|
|
description: 'Load PDF documents',
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'Text Loader',
|
|
|
|
value: 'textLoader',
|
|
|
|
description: 'Load plain text files',
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
{
|
|
|
|
displayName: 'Binary Data Key',
|
|
|
|
name: 'binaryDataKey',
|
|
|
|
type: 'string',
|
|
|
|
default: 'data',
|
|
|
|
required: true,
|
|
|
|
description: 'Name of the binary property from which to read the file buffer',
|
|
|
|
},
|
|
|
|
// PDF Only Fields
|
|
|
|
{
|
|
|
|
displayName: 'Split Pages',
|
|
|
|
name: 'splitPages',
|
|
|
|
type: 'boolean',
|
|
|
|
default: true,
|
|
|
|
displayOptions: {
|
|
|
|
show: {
|
|
|
|
loader: ['pdfLoader'],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// CSV Only Fields
|
|
|
|
{
|
|
|
|
displayName: 'Column',
|
|
|
|
name: 'column',
|
|
|
|
type: 'string',
|
|
|
|
default: '',
|
|
|
|
description: 'Column to extract from CSV',
|
|
|
|
displayOptions: {
|
|
|
|
show: {
|
|
|
|
loader: ['csvLoader'],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
displayName: 'Separator',
|
|
|
|
name: 'separator',
|
|
|
|
type: 'string',
|
|
|
|
description: 'Separator to use for CSV',
|
|
|
|
default: ',',
|
|
|
|
displayOptions: {
|
|
|
|
show: {
|
|
|
|
loader: ['csvLoader'],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// JSON Only Fields
|
|
|
|
{
|
|
|
|
displayName: 'Pointers',
|
|
|
|
name: 'pointers',
|
|
|
|
type: 'string',
|
|
|
|
default: '',
|
|
|
|
description: 'Pointers to extract from JSON, e.g. "/text" or "/text, /meta/title"',
|
|
|
|
displayOptions: {
|
|
|
|
show: {
|
|
|
|
loader: ['jsonLoader'],
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
displayName: 'Options',
|
|
|
|
name: 'options',
|
|
|
|
type: 'collection',
|
|
|
|
placeholder: 'Add Option',
|
|
|
|
default: {},
|
|
|
|
options: [
|
|
|
|
{
|
|
|
|
...metadataFilterField,
|
|
|
|
displayName: 'Metadata',
|
|
|
|
description:
|
|
|
|
'Metadata to add to each document. Could be used for filtering during retrieval',
|
|
|
|
placeholder: 'Add property',
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
],
|
|
|
|
};
|
|
|
|
|
|
|
|
async supplyData(this: IExecuteFunctions): Promise<SupplyData> {
|
|
|
|
this.logger.verbose('Supply Data for Binary Input Loader');
|
2023-12-08 04:42:32 -08:00
|
|
|
const textSplitter = (await this.getInputConnectionData(
|
|
|
|
NodeConnectionType.AiTextSplitter,
|
|
|
|
0,
|
|
|
|
)) as TextSplitter | undefined;
|
|
|
|
|
|
|
|
const binaryDataKey = this.getNodeParameter('binaryDataKey', 0) as string;
|
|
|
|
const processor = new N8nBinaryLoader(this, undefined, binaryDataKey, textSplitter);
|
2023-11-29 03:13:55 -08:00
|
|
|
|
|
|
|
return {
|
|
|
|
response: logWrapper(processor, this),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|