diff --git a/packages/core/package.json b/packages/core/package.json index 7fdd5f99af..84cd8f7c8a 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -42,6 +42,7 @@ "@sentry/node": "catalog:", "aws4": "1.11.0", "axios": "catalog:", + "chardet": "2.0.0", "concat-stream": "2.0.0", "cron": "3.1.7", "fast-glob": "catalog:", diff --git a/packages/core/src/NodeExecuteFunctions.ts b/packages/core/src/NodeExecuteFunctions.ts index 578752b3ef..38679513de 100644 --- a/packages/core/src/NodeExecuteFunctions.ts +++ b/packages/core/src/NodeExecuteFunctions.ts @@ -15,6 +15,7 @@ import type { import { ClientOAuth2 } from '@n8n/client-oauth2'; import type { AxiosError, AxiosHeaders, AxiosRequestConfig, AxiosResponse } from 'axios'; import axios from 'axios'; +import chardet from 'chardet'; import crypto, { createHmac } from 'crypto'; import FileType from 'file-type'; import FormData from 'form-data'; @@ -1050,6 +1051,10 @@ export async function getBinaryDataBuffer( return await Container.get(BinaryDataService).getAsBuffer(binaryData); } +export function detectBinaryEncoding(buffer: Buffer): string { + return chardet.detect(buffer) as string; +} + /** * Store an incoming IBinaryData & related buffer using the configured binary data manager. * diff --git a/packages/core/src/node-execution-context/execute-context.ts b/packages/core/src/node-execution-context/execute-context.ts index 954059d86d..d563881bea 100644 --- a/packages/core/src/node-execution-context/execute-context.ts +++ b/packages/core/src/node-execution-context/execute-context.ts @@ -37,6 +37,7 @@ import { getSSHTunnelFunctions, getFileSystemHelperFunctions, getCheckProcessedHelperFunctions, + detectBinaryEncoding, } from '@/NodeExecuteFunctions'; import { BaseExecuteContext } from './base-execute-context'; @@ -96,6 +97,7 @@ export class ExecuteContext extends BaseExecuteContext implements IExecuteFuncti assertBinaryData(inputData, node, itemIndex, propertyName, 0), getBinaryDataBuffer: async (itemIndex, propertyName) => await getBinaryDataBuffer(inputData, itemIndex, propertyName, 0), + detectBinaryEncoding: (buffer: Buffer) => detectBinaryEncoding(buffer), }; this.nodeHelpers = { diff --git a/packages/core/src/node-execution-context/execute-single-context.ts b/packages/core/src/node-execution-context/execute-single-context.ts index cb46ea9c91..af837a12c5 100644 --- a/packages/core/src/node-execution-context/execute-single-context.ts +++ b/packages/core/src/node-execution-context/execute-single-context.ts @@ -16,6 +16,7 @@ import { ApplicationError, createDeferredPromise, NodeConnectionType } from 'n8n // eslint-disable-next-line import/no-cycle import { assertBinaryData, + detectBinaryEncoding, getBinaryDataBuffer, getBinaryHelperFunctions, getRequestHelperFunctions, @@ -69,6 +70,7 @@ export class ExecuteSingleContext extends BaseExecuteContext implements IExecute assertBinaryData(inputData, node, itemIndex, propertyName, inputIndex), getBinaryDataBuffer: async (propertyName, inputIndex = 0) => await getBinaryDataBuffer(inputData, itemIndex, propertyName, inputIndex), + detectBinaryEncoding: (buffer) => detectBinaryEncoding(buffer), }; } diff --git a/packages/core/src/node-execution-context/supply-data-context.ts b/packages/core/src/node-execution-context/supply-data-context.ts index c3d7f45468..6d8679d75e 100644 --- a/packages/core/src/node-execution-context/supply-data-context.ts +++ b/packages/core/src/node-execution-context/supply-data-context.ts @@ -24,6 +24,7 @@ import { assertBinaryData, constructExecutionMetaData, copyInputItems, + detectBinaryEncoding, getBinaryDataBuffer, getBinaryHelperFunctions, getCheckProcessedHelperFunctions, @@ -87,6 +88,7 @@ export class SupplyDataContext extends BaseExecuteContext implements ISupplyData assertBinaryData(inputData, node, itemIndex, propertyName, 0), getBinaryDataBuffer: async (itemIndex, propertyName) => await getBinaryDataBuffer(inputData, itemIndex, propertyName, 0), + detectBinaryEncoding: (buffer: Buffer) => detectBinaryEncoding(buffer), returnJsonArray, normalizeItems, diff --git a/packages/nodes-base/nodes/Files/ExtractFromFile/ExtractFromFile.node.ts b/packages/nodes-base/nodes/Files/ExtractFromFile/ExtractFromFile.node.ts index 66f6ca2c9c..c863d84818 100644 --- a/packages/nodes-base/nodes/Files/ExtractFromFile/ExtractFromFile.node.ts +++ b/packages/nodes-base/nodes/Files/ExtractFromFile/ExtractFromFile.node.ts @@ -6,9 +6,9 @@ import type { } from 'n8n-workflow'; import { NodeConnectionType } from 'n8n-workflow'; -import * as spreadsheet from './actions/spreadsheet.operation'; import * as moveTo from './actions/moveTo.operation'; import * as pdf from './actions/pdf.operation'; +import * as spreadsheet from './actions/spreadsheet.operation'; export class ExtractFromFile implements INodeType { // eslint-disable-next-line n8n-nodes-base/node-class-description-missing-subtitle diff --git a/packages/nodes-base/nodes/Files/ExtractFromFile/actions/moveTo.operation.ts b/packages/nodes-base/nodes/Files/ExtractFromFile/actions/moveTo.operation.ts index e4f36afc06..c8b6c115a6 100644 --- a/packages/nodes-base/nodes/Files/ExtractFromFile/actions/moveTo.operation.ts +++ b/packages/nodes-base/nodes/Files/ExtractFromFile/actions/moveTo.operation.ts @@ -121,8 +121,9 @@ export async function execute( if (!value) continue; - const encoding = (options.encoding as string) || 'utf8'; const buffer = await this.helpers.getBinaryDataBuffer(itemIndex, binaryPropertyName); + const encoding = + (options.encoding as string) || (this.helpers.detectBinaryEncoding(buffer) as string); if (options.keepSource && options.keepSource !== 'binary') { newItem.json = deepCopy(item.json); diff --git a/packages/nodes-base/nodes/Files/ExtractFromFile/test/ExtractFromFile.node.test.ts b/packages/nodes-base/nodes/Files/ExtractFromFile/test/ExtractFromFile.node.test.ts new file mode 100644 index 0000000000..aebf28eb73 --- /dev/null +++ b/packages/nodes-base/nodes/Files/ExtractFromFile/test/ExtractFromFile.node.test.ts @@ -0,0 +1,6 @@ +import { getWorkflowFilenames, testWorkflows } from '@test/nodes/Helpers'; + +describe('ExtractFromFile', () => { + const workflows = getWorkflowFilenames(__dirname); + testWorkflows(workflows); +}); diff --git a/packages/nodes-base/nodes/Files/ExtractFromFile/test/workflow.non_utf8_encoding.json b/packages/nodes-base/nodes/Files/ExtractFromFile/test/workflow.non_utf8_encoding.json new file mode 100644 index 0000000000..8462d93b93 --- /dev/null +++ b/packages/nodes-base/nodes/Files/ExtractFromFile/test/workflow.non_utf8_encoding.json @@ -0,0 +1,98 @@ +{ + "nodes": [ + { + "parameters": {}, + "type": "n8n-nodes-base.manualTrigger", + "typeVersion": 1, + "position": [-200, -40], + "id": "64686c0a-64a4-4a33-9e70-038c9d23c25b", + "name": "When clicking ‘Test workflow’" + }, + { + "parameters": { + "operation": "text", + "binaryPropertyName": "myfile", + "options": {} + }, + "type": "n8n-nodes-base.extractFromFile", + "typeVersion": 1, + "position": [420, -40], + "id": "aaac18d3-1e99-4c47-9de8-2dc8bf95abd7", + "name": "Extract from File" + }, + { + "parameters": { + "assignments": { + "assignments": [ + { + "id": "39b5f05f-85c5-499a-86d6-591d6440f147", + "name": "text", + "value": "Karlovy Vary město lázní Příliš žluťoučký kůň úpěl ďábelské ódy.", + "type": "string" + } + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [-20, -40], + "id": "e74c67f1-171a-42a7-be12-b6687935988f", + "name": "Edit Fields" + }, + { + "parameters": { + "operation": "toText", + "sourceProperty": "text", + "binaryPropertyName": "myfile", + "options": { + "encoding": "windows1256" + } + }, + "type": "n8n-nodes-base.convertToFile", + "typeVersion": 1.1, + "position": [180, -40], + "id": "77ddd6d4-1d75-4ad2-8301-e70213d8371e", + "name": "windows1256" + } + ], + "connections": { + "When clicking ‘Test workflow’": { + "main": [ + [ + { + "node": "Edit Fields", + "type": "main", + "index": 0 + } + ] + ] + }, + "Extract from File": { + "main": [[]] + }, + "Edit Fields": { + "main": [ + [ + { + "node": "windows1256", + "type": "main", + "index": 0 + } + ] + ] + }, + "windows1256": { + "main": [ + [ + { + "node": "Extract from File", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "pinData": {} +} diff --git a/packages/workflow/src/Interfaces.ts b/packages/workflow/src/Interfaces.ts index 116d392722..c5fa0a938a 100644 --- a/packages/workflow/src/Interfaces.ts +++ b/packages/workflow/src/Interfaces.ts @@ -947,6 +947,7 @@ export type IExecuteFunctions = ExecuteFunctions.GetNodeParameterFn & ): NodeExecutionWithMetadata[]; assertBinaryData(itemIndex: number, propertyName: string): IBinaryData; getBinaryDataBuffer(itemIndex: number, propertyName: string): Promise; + detectBinaryEncoding(buffer: Buffer): string; copyInputItems(items: INodeExecutionData[], properties: string[]): IDataObject[]; }; @@ -973,6 +974,7 @@ export interface IExecuteSingleFunctions extends BaseExecutionFunctions { BinaryHelperFunctions & { assertBinaryData(propertyName: string, inputIndex?: number): IBinaryData; getBinaryDataBuffer(propertyName: string, inputIndex?: number): Promise; + detectBinaryEncoding(buffer: Buffer): string; }; } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e21c919dee..105345263c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1130,6 +1130,9 @@ importers: axios: specifier: 'catalog:' version: 1.7.4 + chardet: + specifier: 2.0.0 + version: 2.0.0 concat-stream: specifier: 2.0.0 version: 2.0.0 @@ -6833,6 +6836,9 @@ packages: chardet@0.7.0: resolution: {integrity: sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==} + chardet@2.0.0: + resolution: {integrity: sha512-xVgPpulCooDjY6zH4m9YW3jbkaBe3FKIAvF5sj5t7aBNsVl2ljIE+xwJ4iNgiDZHFQvNIpjdKdVOQvvk5ZfxbQ==} + charenc@0.0.2: resolution: {integrity: sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==} @@ -19870,6 +19876,8 @@ snapshots: chardet@0.7.0: {} + chardet@2.0.0: {} + charenc@0.0.2: {} chart.js@4.4.0: