fix(Extract from File Node): Detect file encoding (#12081)

Co-authored-by: कारतोफ्फेलस्क्रिप्ट™ <aditya@netroy.in>
This commit is contained in:
Dana 2024-12-18 16:59:12 +01:00 committed by GitHub
parent bf8142b474
commit 92af245d1a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 129 additions and 2 deletions

View file

@ -42,6 +42,7 @@
"@sentry/node": "catalog:",
"aws4": "1.11.0",
"axios": "catalog:",
"chardet": "2.0.0",
"concat-stream": "2.0.0",
"cron": "3.1.7",
"fast-glob": "catalog:",

View file

@ -15,6 +15,7 @@ import type {
import { ClientOAuth2 } from '@n8n/client-oauth2';
import type { AxiosError, AxiosHeaders, AxiosRequestConfig, AxiosResponse } from 'axios';
import axios from 'axios';
import chardet from 'chardet';
import crypto, { createHmac } from 'crypto';
import FileType from 'file-type';
import FormData from 'form-data';
@ -1050,6 +1051,10 @@ export async function getBinaryDataBuffer(
return await Container.get(BinaryDataService).getAsBuffer(binaryData);
}
export function detectBinaryEncoding(buffer: Buffer): string {
return chardet.detect(buffer) as string;
}
/**
* Store an incoming IBinaryData & related buffer using the configured binary data manager.
*

View file

@ -37,6 +37,7 @@ import {
getSSHTunnelFunctions,
getFileSystemHelperFunctions,
getCheckProcessedHelperFunctions,
detectBinaryEncoding,
} from '@/NodeExecuteFunctions';
import { BaseExecuteContext } from './base-execute-context';
@ -96,6 +97,7 @@ export class ExecuteContext extends BaseExecuteContext implements IExecuteFuncti
assertBinaryData(inputData, node, itemIndex, propertyName, 0),
getBinaryDataBuffer: async (itemIndex, propertyName) =>
await getBinaryDataBuffer(inputData, itemIndex, propertyName, 0),
detectBinaryEncoding: (buffer: Buffer) => detectBinaryEncoding(buffer),
};
this.nodeHelpers = {

View file

@ -16,6 +16,7 @@ import { ApplicationError, createDeferredPromise, NodeConnectionType } from 'n8n
// eslint-disable-next-line import/no-cycle
import {
assertBinaryData,
detectBinaryEncoding,
getBinaryDataBuffer,
getBinaryHelperFunctions,
getRequestHelperFunctions,
@ -69,6 +70,7 @@ export class ExecuteSingleContext extends BaseExecuteContext implements IExecute
assertBinaryData(inputData, node, itemIndex, propertyName, inputIndex),
getBinaryDataBuffer: async (propertyName, inputIndex = 0) =>
await getBinaryDataBuffer(inputData, itemIndex, propertyName, inputIndex),
detectBinaryEncoding: (buffer) => detectBinaryEncoding(buffer),
};
}

View file

@ -24,6 +24,7 @@ import {
assertBinaryData,
constructExecutionMetaData,
copyInputItems,
detectBinaryEncoding,
getBinaryDataBuffer,
getBinaryHelperFunctions,
getCheckProcessedHelperFunctions,
@ -87,6 +88,7 @@ export class SupplyDataContext extends BaseExecuteContext implements ISupplyData
assertBinaryData(inputData, node, itemIndex, propertyName, 0),
getBinaryDataBuffer: async (itemIndex, propertyName) =>
await getBinaryDataBuffer(inputData, itemIndex, propertyName, 0),
detectBinaryEncoding: (buffer: Buffer) => detectBinaryEncoding(buffer),
returnJsonArray,
normalizeItems,

View file

@ -6,9 +6,9 @@ import type {
} from 'n8n-workflow';
import { NodeConnectionType } from 'n8n-workflow';
import * as spreadsheet from './actions/spreadsheet.operation';
import * as moveTo from './actions/moveTo.operation';
import * as pdf from './actions/pdf.operation';
import * as spreadsheet from './actions/spreadsheet.operation';
export class ExtractFromFile implements INodeType {
// eslint-disable-next-line n8n-nodes-base/node-class-description-missing-subtitle

View file

@ -121,8 +121,9 @@ export async function execute(
if (!value) continue;
const encoding = (options.encoding as string) || 'utf8';
const buffer = await this.helpers.getBinaryDataBuffer(itemIndex, binaryPropertyName);
const encoding =
(options.encoding as string) || (this.helpers.detectBinaryEncoding(buffer) as string);
if (options.keepSource && options.keepSource !== 'binary') {
newItem.json = deepCopy(item.json);

View file

@ -0,0 +1,6 @@
import { getWorkflowFilenames, testWorkflows } from '@test/nodes/Helpers';
describe('ExtractFromFile', () => {
const workflows = getWorkflowFilenames(__dirname);
testWorkflows(workflows);
});

View file

@ -0,0 +1,98 @@
{
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [-200, -40],
"id": "64686c0a-64a4-4a33-9e70-038c9d23c25b",
"name": "When clicking Test workflow"
},
{
"parameters": {
"operation": "text",
"binaryPropertyName": "myfile",
"options": {}
},
"type": "n8n-nodes-base.extractFromFile",
"typeVersion": 1,
"position": [420, -40],
"id": "aaac18d3-1e99-4c47-9de8-2dc8bf95abd7",
"name": "Extract from File"
},
{
"parameters": {
"assignments": {
"assignments": [
{
"id": "39b5f05f-85c5-499a-86d6-591d6440f147",
"name": "text",
"value": "Karlovy Vary město lázní Příliš žluťoučký kůň úpěl ďábelské ódy.",
"type": "string"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [-20, -40],
"id": "e74c67f1-171a-42a7-be12-b6687935988f",
"name": "Edit Fields"
},
{
"parameters": {
"operation": "toText",
"sourceProperty": "text",
"binaryPropertyName": "myfile",
"options": {
"encoding": "windows1256"
}
},
"type": "n8n-nodes-base.convertToFile",
"typeVersion": 1.1,
"position": [180, -40],
"id": "77ddd6d4-1d75-4ad2-8301-e70213d8371e",
"name": "windows1256"
}
],
"connections": {
"When clicking Test workflow": {
"main": [
[
{
"node": "Edit Fields",
"type": "main",
"index": 0
}
]
]
},
"Extract from File": {
"main": [[]]
},
"Edit Fields": {
"main": [
[
{
"node": "windows1256",
"type": "main",
"index": 0
}
]
]
},
"windows1256": {
"main": [
[
{
"node": "Extract from File",
"type": "main",
"index": 0
}
]
]
}
},
"pinData": {}
}

View file

@ -947,6 +947,7 @@ export type IExecuteFunctions = ExecuteFunctions.GetNodeParameterFn &
): NodeExecutionWithMetadata[];
assertBinaryData(itemIndex: number, propertyName: string): IBinaryData;
getBinaryDataBuffer(itemIndex: number, propertyName: string): Promise<Buffer>;
detectBinaryEncoding(buffer: Buffer): string;
copyInputItems(items: INodeExecutionData[], properties: string[]): IDataObject[];
};
@ -973,6 +974,7 @@ export interface IExecuteSingleFunctions extends BaseExecutionFunctions {
BinaryHelperFunctions & {
assertBinaryData(propertyName: string, inputIndex?: number): IBinaryData;
getBinaryDataBuffer(propertyName: string, inputIndex?: number): Promise<Buffer>;
detectBinaryEncoding(buffer: Buffer): string;
};
}

View file

@ -1130,6 +1130,9 @@ importers:
axios:
specifier: 'catalog:'
version: 1.7.4
chardet:
specifier: 2.0.0
version: 2.0.0
concat-stream:
specifier: 2.0.0
version: 2.0.0
@ -6833,6 +6836,9 @@ packages:
chardet@0.7.0:
resolution: {integrity: sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==}
chardet@2.0.0:
resolution: {integrity: sha512-xVgPpulCooDjY6zH4m9YW3jbkaBe3FKIAvF5sj5t7aBNsVl2ljIE+xwJ4iNgiDZHFQvNIpjdKdVOQvvk5ZfxbQ==}
charenc@0.0.2:
resolution: {integrity: sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==}
@ -19870,6 +19876,8 @@ snapshots:
chardet@0.7.0: {}
chardet@2.0.0: {}
charenc@0.0.2: {}
chart.js@4.4.0: