feat(Spreadsheet File Node): Improve CSV parsing (#7448)

This adds support for
1. custom delimiters
2. reading offsets to avoid having to read a large CSV all at once
3. excluding byte-order-mark

NODE-861
#7443
This commit is contained in:
कारतोफ्फेलस्क्रिप्ट™ 2023-10-18 16:57:37 +02:00 committed by GitHub
parent d8531a53b9
commit 79f23fb939
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 273 additions and 15 deletions

View file

@ -201,6 +201,59 @@ export const optionsProperties: INodeProperties[] = [
default: true,
description: 'Whether the first row of the file contains the header names',
},
{
displayName: 'Delimiter',
name: 'delimiter',
type: 'string',
displayOptions: {
show: {
'/operation': ['fromFile'],
'/fileFormat': ['csv'],
},
},
default: ',',
description: 'Set the field delimiter',
},
{
displayName: 'Starting Line',
name: 'fromLine',
type: 'number',
displayOptions: {
show: {
'/operation': ['fromFile'],
'/fileFormat': ['csv'],
},
},
default: 0,
description: 'Start handling records from the requested line number',
},
{
displayName: 'Max Number of Rows to Load',
name: 'maxRowCount',
type: 'number',
displayOptions: {
show: {
'/operation': ['fromFile'],
'/fileFormat': ['csv'],
},
},
default: -1,
description: 'Stop handling records after the requested number of rows are read',
},
{
displayName: 'Exclude Byte Order Mark (BOM)',
name: 'enableBOM',
type: 'boolean',
displayOptions: {
show: {
'/operation': ['fromFile'],
'/fileFormat': ['csv'],
},
},
default: false,
description:
'Whether to detect and exclude the byte-order-mark from the CSV Input if present',
},
{
displayName: 'Include Empty Cells',
name: 'includeEmptyCells',

View file

@ -1,24 +1,28 @@
import path from 'path';
import type { IWorkflowBase } from 'n8n-workflow';
import * as Helpers from '@test/nodes/Helpers';
import type { WorkflowTestData } from '@test/nodes/types';
import { executeWorkflow } from '@test/nodes/ExecuteWorkflow';
import path from 'path';
describe('Execute Spreadsheet File Node', () => {
beforeEach(async () => {
await Helpers.initBinaryDataService();
});
// replace workflow json 'Read Binary File' node's filePath to local file
const workflow = Helpers.readJsonFileSync('nodes/SpreadsheetFile/test/workflow.json');
const node = workflow.nodes.find((n: any) => n.name === 'Read Binary File');
node.parameters.filePath = path.join(__dirname, 'spreadsheet.csv');
const loadWorkflow = (fileName: string, csvName: string) => {
const workflow = Helpers.readJsonFileSync<IWorkflowBase>(
`nodes/SpreadsheetFile/test/${fileName}`,
);
const node = workflow.nodes.find((n) => n.name === 'Read Binary File');
node!.parameters.fileSelector = path.join(__dirname, csvName);
return workflow;
};
const tests: WorkflowTestData[] = [
{
description: 'execute workflow.json',
input: {
workflowData: workflow,
workflowData: loadWorkflow('workflow.json', 'spreadsheet.csv'),
},
output: {
nodeData: {
@ -78,6 +82,7 @@ describe('Execute Spreadsheet File Node', () => {
},
],
],
'Read CSV with Row Limit': [[{ json: { A: '1', B: '2', C: '3' } }]],
'Write To File CSV': [
[
{
@ -149,6 +154,18 @@ describe('Execute Spreadsheet File Node', () => {
},
},
},
{
description: 'execute workflow.bom.json',
input: {
workflowData: loadWorkflow('workflow.bom.json', 'bom.csv'),
},
output: {
nodeData: {
'Edit with BOM included': [[{ json: { X: null } }]],
'Edit with BOM excluded': [[{ json: { X: '1' } }]],
},
},
},
];
const nodeTypes = Helpers.setup(tests);

View file

@ -0,0 +1,2 @@
a,b,c
1,2,3
1 a b c
2 1 2 3

View file

@ -0,0 +1,155 @@
{
"nodes": [
{
"parameters": {},
"id": "40bf604f-19f9-43e7-8bbb-74c36925f154",
"name": "When clicking \"Execute Workflow\"",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
-320,
1040
]
},
{
"parameters": {
"fileSelector": "bom.csv"
},
"id": "623ea890-8882-4273-973e-834652d823b5",
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFiles",
"typeVersion": 1,
"position": [
-100,
1040
]
},
{
"parameters": {
"fileFormat": "csv",
"options": {
"enableBOM": true
}
},
"id": "c8cca5fb-e119-4ca1-a597-4f051a7f64ea",
"name": "Exclude BOM",
"type": "n8n-nodes-base.spreadsheetFile",
"typeVersion": 2,
"position": [
120,
960
]
},
{
"parameters": {
"fileFormat": "csv",
"options": {
"enableBOM": false
}
},
"id": "56ec11dc-966b-4d06-b8c0-61475b30333d",
"name": "Include BOM",
"type": "n8n-nodes-base.spreadsheetFile",
"typeVersion": 2,
"position": [
120,
1180
]
},
{
"parameters": {
"fields": {
"values": [
{
"name": "X",
"stringValue": "={{ $json.a }}"
}
]
},
"include": "none",
"options": {}
},
"id": "6f6bccf2-d674-4774-9df9-6f6fd893bace",
"name": "Edit with BOM excluded",
"type": "n8n-nodes-base.set",
"typeVersion": 3.2,
"position": [
320,
960
]
},
{
"parameters": {
"fields": {
"values": [
{
"name": "X",
"stringValue": "={{ $json.a }}"
}
]
},
"include": "none",
"options": {}
},
"id": "27ca5cde-19cb-4bf2-9ab4-7f7e77ad01bd",
"name": "Edit with BOM included",
"type": "n8n-nodes-base.set",
"typeVersion": 3.2,
"position": [
320,
1180
]
}
],
"connections": {
"When clicking \"Execute Workflow\"": {
"main": [
[
{
"node": "Read Binary File",
"type": "main",
"index": 0
}
]
]
},
"Exclude BOM": {
"main": [
[
{
"node": "Edit with BOM excluded",
"type": "main",
"index": 0
}
]
]
},
"Include BOM": {
"main": [
[
{
"node": "Edit with BOM included",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File": {
"main": [
[
{
"node": "Exclude BOM",
"type": "main",
"index": 0
},
{
"node": "Include BOM",
"type": "main",
"index": 0
}
]
]
}
}
}

View file

@ -1,7 +1,4 @@
{
"meta": {
"instanceId": "104a4d08d8897b8bdeb38aaca515021075e0bd8544c983c2bb8c86e6a8e6081c"
},
"nodes": [
{
"parameters": {},
@ -29,11 +26,11 @@
},
{
"parameters": {
"filePath": "C:\\Users\\spech\\Documents\\GitHub\\n8n-master\\packages\\nodes-base\\nodes\\SpreadsheetFile\\test\\spreadsheet.csv"
"fileSelector": "spreadsheet.csv"
},
"id": "d7620053-eb3d-43dd-b2cd-d60d9a08a9cc",
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFile",
"type": "n8n-nodes-base.readBinaryFiles",
"typeVersion": 1,
"position": [
840,
@ -173,6 +170,22 @@
1060,
940
]
},
{
"parameters": {
"fileFormat": "csv",
"options": {
"maxRowCount": 1
}
},
"id": "de905389-a11b-4dd8-8416-14d650804445",
"name": "Read CSV with Row Limit",
"type": "n8n-nodes-base.spreadsheetFile",
"typeVersion": 2,
"position": [
-60,
1340
]
}
],
"connections": {
@ -245,6 +258,11 @@
"node": "Read From File Read as String",
"type": "main",
"index": 0
},
{
"node": "Read CSV with Row Limit",
"type": "main",
"index": 0
}
]
]

View file

@ -1,5 +1,4 @@
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
import { pipeline } from 'stream/promises';
import type {
IDataObject,
IExecuteFunctions,
@ -85,7 +84,12 @@ export class SpreadsheetFileV2 implements INodeType {
}
if (fileFormat === 'csv') {
const maxRowCount = options.maxRowCount as number;
const parser = createCSVParser({
delimiter: options.delimiter as string,
fromLine: options.fromLine as number,
bom: options.enableBOM as boolean,
to: maxRowCount > -1 ? maxRowCount : undefined,
columns: options.headerRow !== false,
onRecord: (record) => {
rows.push(record);
@ -93,9 +97,18 @@ export class SpreadsheetFileV2 implements INodeType {
});
if (binaryData.id) {
const stream = await this.helpers.getBinaryStream(binaryData.id);
await pipeline(stream, parser);
await new Promise<void>(async (resolve, reject) => {
parser.on('error', reject);
parser.on('readable', () => {
stream.unpipe(parser);
stream.destroy();
resolve();
});
stream.pipe(parser);
});
} else {
parser.write(binaryData.data, BINARY_ENCODING);
parser.end();
}
} else {
let workbook: WorkBook;