2024-10-28 03:37:23 -07:00
|
|
|
import {
|
|
|
|
type IExecuteFunctions,
|
|
|
|
type INodeExecutionData,
|
|
|
|
type ISupplyDataFunctions,
|
|
|
|
NodeOperationError,
|
|
|
|
} from 'n8n-workflow';
|
2023-11-29 03:13:55 -08:00
|
|
|
|
2024-05-24 05:43:17 -07:00
|
|
|
import type { TextSplitter } from '@langchain/textsplitters';
|
2024-03-07 02:36:36 -08:00
|
|
|
import type { Document } from '@langchain/core/documents';
|
2023-11-29 03:13:55 -08:00
|
|
|
import { JSONLoader } from 'langchain/document_loaders/fs/json';
|
|
|
|
import { TextLoader } from 'langchain/document_loaders/fs/text';
|
|
|
|
import { getMetadataFiltersValues } from './helpers';
|
|
|
|
|
|
|
|
export class N8nJsonLoader {
|
2024-10-28 03:37:23 -07:00
|
|
|
constructor(
|
|
|
|
private context: IExecuteFunctions | ISupplyDataFunctions,
|
|
|
|
private optionsPrefix = '',
|
|
|
|
private textSplitter?: TextSplitter,
|
|
|
|
) {}
|
2023-11-29 03:13:55 -08:00
|
|
|
|
|
|
|
async processAll(items?: INodeExecutionData[]): Promise<Document[]> {
|
|
|
|
const docs: Document[] = [];
|
|
|
|
|
|
|
|
if (!items) return [];
|
|
|
|
|
|
|
|
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
|
|
|
const processedDocuments = await this.processItem(items[itemIndex], itemIndex);
|
|
|
|
|
|
|
|
docs.push(...processedDocuments);
|
|
|
|
}
|
|
|
|
|
|
|
|
return docs;
|
|
|
|
}
|
|
|
|
|
|
|
|
async processItem(item: INodeExecutionData, itemIndex: number): Promise<Document[]> {
|
|
|
|
const mode = this.context.getNodeParameter('jsonMode', itemIndex, 'allInputData') as
|
|
|
|
| 'allInputData'
|
|
|
|
| 'expressionData';
|
|
|
|
|
|
|
|
const pointers = this.context.getNodeParameter(
|
|
|
|
`${this.optionsPrefix}pointers`,
|
|
|
|
itemIndex,
|
|
|
|
'',
|
|
|
|
) as string;
|
|
|
|
const pointersArray = pointers.split(',').map((pointer) => pointer.trim());
|
|
|
|
const metadata = getMetadataFiltersValues(this.context, itemIndex) ?? [];
|
|
|
|
|
|
|
|
if (!item) return [];
|
|
|
|
|
|
|
|
let documentLoader: JSONLoader | TextLoader | null = null;
|
|
|
|
|
|
|
|
if (mode === 'allInputData') {
|
|
|
|
const itemString = JSON.stringify(item.json);
|
|
|
|
const itemBlob = new Blob([itemString], { type: 'application/json' });
|
|
|
|
documentLoader = new JSONLoader(itemBlob, pointersArray);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mode === 'expressionData') {
|
|
|
|
const dataString = this.context.getNodeParameter('jsonData', itemIndex) as string | object;
|
|
|
|
if (typeof dataString === 'object') {
|
|
|
|
const itemBlob = new Blob([JSON.stringify(dataString)], { type: 'application/json' });
|
|
|
|
documentLoader = new JSONLoader(itemBlob, pointersArray);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (typeof dataString === 'string') {
|
|
|
|
const itemBlob = new Blob([dataString], { type: 'text/plain' });
|
|
|
|
documentLoader = new TextLoader(itemBlob);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (documentLoader === null) {
|
|
|
|
// This should never happen
|
|
|
|
throw new NodeOperationError(this.context.getNode(), 'Document loader is not initialized');
|
|
|
|
}
|
|
|
|
|
2023-12-08 04:42:32 -08:00
|
|
|
const docs = this.textSplitter
|
2024-05-24 05:43:17 -07:00
|
|
|
? await this.textSplitter.splitDocuments(await documentLoader.load())
|
2023-11-29 03:13:55 -08:00
|
|
|
: await documentLoader.load();
|
|
|
|
|
|
|
|
if (metadata) {
|
|
|
|
docs.forEach((doc) => {
|
|
|
|
doc.metadata = {
|
|
|
|
...doc.metadata,
|
|
|
|
...metadata,
|
|
|
|
};
|
|
|
|
});
|
|
|
|
}
|
|
|
|
return docs;
|
|
|
|
}
|
|
|
|
}
|