mirror of
https://github.com/n8n-io/n8n.git
synced 2024-12-25 20:54:07 -08:00
feat(Spreadsheet File Node): Improve CSV parsing (#7448)
This adds support for 1. custom delimiters 2. reading offsets to avoid having to read a large CSV all at once 3. excluding byte-order-mark NODE-861 #7443
This commit is contained in:
parent
d8531a53b9
commit
79f23fb939
|
@ -201,6 +201,59 @@ export const optionsProperties: INodeProperties[] = [
|
||||||
default: true,
|
default: true,
|
||||||
description: 'Whether the first row of the file contains the header names',
|
description: 'Whether the first row of the file contains the header names',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
displayName: 'Delimiter',
|
||||||
|
name: 'delimiter',
|
||||||
|
type: 'string',
|
||||||
|
displayOptions: {
|
||||||
|
show: {
|
||||||
|
'/operation': ['fromFile'],
|
||||||
|
'/fileFormat': ['csv'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
default: ',',
|
||||||
|
description: 'Set the field delimiter',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
displayName: 'Starting Line',
|
||||||
|
name: 'fromLine',
|
||||||
|
type: 'number',
|
||||||
|
displayOptions: {
|
||||||
|
show: {
|
||||||
|
'/operation': ['fromFile'],
|
||||||
|
'/fileFormat': ['csv'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
default: 0,
|
||||||
|
description: 'Start handling records from the requested line number',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
displayName: 'Max Number of Rows to Load',
|
||||||
|
name: 'maxRowCount',
|
||||||
|
type: 'number',
|
||||||
|
displayOptions: {
|
||||||
|
show: {
|
||||||
|
'/operation': ['fromFile'],
|
||||||
|
'/fileFormat': ['csv'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
default: -1,
|
||||||
|
description: 'Stop handling records after the requested number of rows are read',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
displayName: 'Exclude Byte Order Mark (BOM)',
|
||||||
|
name: 'enableBOM',
|
||||||
|
type: 'boolean',
|
||||||
|
displayOptions: {
|
||||||
|
show: {
|
||||||
|
'/operation': ['fromFile'],
|
||||||
|
'/fileFormat': ['csv'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
default: false,
|
||||||
|
description:
|
||||||
|
'Whether to detect and exclude the byte-order-mark from the CSV Input if present',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
displayName: 'Include Empty Cells',
|
displayName: 'Include Empty Cells',
|
||||||
name: 'includeEmptyCells',
|
name: 'includeEmptyCells',
|
||||||
|
|
|
@ -1,24 +1,28 @@
|
||||||
|
import path from 'path';
|
||||||
|
import type { IWorkflowBase } from 'n8n-workflow';
|
||||||
import * as Helpers from '@test/nodes/Helpers';
|
import * as Helpers from '@test/nodes/Helpers';
|
||||||
import type { WorkflowTestData } from '@test/nodes/types';
|
import type { WorkflowTestData } from '@test/nodes/types';
|
||||||
|
|
||||||
import { executeWorkflow } from '@test/nodes/ExecuteWorkflow';
|
import { executeWorkflow } from '@test/nodes/ExecuteWorkflow';
|
||||||
import path from 'path';
|
|
||||||
|
|
||||||
describe('Execute Spreadsheet File Node', () => {
|
describe('Execute Spreadsheet File Node', () => {
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
await Helpers.initBinaryDataService();
|
await Helpers.initBinaryDataService();
|
||||||
});
|
});
|
||||||
|
|
||||||
// replace workflow json 'Read Binary File' node's filePath to local file
|
const loadWorkflow = (fileName: string, csvName: string) => {
|
||||||
const workflow = Helpers.readJsonFileSync('nodes/SpreadsheetFile/test/workflow.json');
|
const workflow = Helpers.readJsonFileSync<IWorkflowBase>(
|
||||||
const node = workflow.nodes.find((n: any) => n.name === 'Read Binary File');
|
`nodes/SpreadsheetFile/test/${fileName}`,
|
||||||
node.parameters.filePath = path.join(__dirname, 'spreadsheet.csv');
|
);
|
||||||
|
const node = workflow.nodes.find((n) => n.name === 'Read Binary File');
|
||||||
|
node!.parameters.fileSelector = path.join(__dirname, csvName);
|
||||||
|
return workflow;
|
||||||
|
};
|
||||||
|
|
||||||
const tests: WorkflowTestData[] = [
|
const tests: WorkflowTestData[] = [
|
||||||
{
|
{
|
||||||
description: 'execute workflow.json',
|
description: 'execute workflow.json',
|
||||||
input: {
|
input: {
|
||||||
workflowData: workflow,
|
workflowData: loadWorkflow('workflow.json', 'spreadsheet.csv'),
|
||||||
},
|
},
|
||||||
output: {
|
output: {
|
||||||
nodeData: {
|
nodeData: {
|
||||||
|
@ -78,6 +82,7 @@ describe('Execute Spreadsheet File Node', () => {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
|
'Read CSV with Row Limit': [[{ json: { A: '1', B: '2', C: '3' } }]],
|
||||||
'Write To File CSV': [
|
'Write To File CSV': [
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
@ -149,6 +154,18 @@ describe('Execute Spreadsheet File Node', () => {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
description: 'execute workflow.bom.json',
|
||||||
|
input: {
|
||||||
|
workflowData: loadWorkflow('workflow.bom.json', 'bom.csv'),
|
||||||
|
},
|
||||||
|
output: {
|
||||||
|
nodeData: {
|
||||||
|
'Edit with BOM included': [[{ json: { X: null } }]],
|
||||||
|
'Edit with BOM excluded': [[{ json: { X: '1' } }]],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
const nodeTypes = Helpers.setup(tests);
|
const nodeTypes = Helpers.setup(tests);
|
||||||
|
|
2
packages/nodes-base/nodes/SpreadsheetFile/test/bom.csv
Normal file
2
packages/nodes-base/nodes/SpreadsheetFile/test/bom.csv
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
a,b,c
|
||||||
|
1,2,3
|
|
155
packages/nodes-base/nodes/SpreadsheetFile/test/workflow.bom.json
Normal file
155
packages/nodes-base/nodes/SpreadsheetFile/test/workflow.bom.json
Normal file
|
@ -0,0 +1,155 @@
|
||||||
|
{
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"parameters": {},
|
||||||
|
"id": "40bf604f-19f9-43e7-8bbb-74c36925f154",
|
||||||
|
"name": "When clicking \"Execute Workflow\"",
|
||||||
|
"type": "n8n-nodes-base.manualTrigger",
|
||||||
|
"typeVersion": 1,
|
||||||
|
"position": [
|
||||||
|
-320,
|
||||||
|
1040
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fileSelector": "bom.csv"
|
||||||
|
},
|
||||||
|
"id": "623ea890-8882-4273-973e-834652d823b5",
|
||||||
|
"name": "Read Binary File",
|
||||||
|
"type": "n8n-nodes-base.readBinaryFiles",
|
||||||
|
"typeVersion": 1,
|
||||||
|
"position": [
|
||||||
|
-100,
|
||||||
|
1040
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fileFormat": "csv",
|
||||||
|
"options": {
|
||||||
|
"enableBOM": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "c8cca5fb-e119-4ca1-a597-4f051a7f64ea",
|
||||||
|
"name": "Exclude BOM",
|
||||||
|
"type": "n8n-nodes-base.spreadsheetFile",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [
|
||||||
|
120,
|
||||||
|
960
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fileFormat": "csv",
|
||||||
|
"options": {
|
||||||
|
"enableBOM": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "56ec11dc-966b-4d06-b8c0-61475b30333d",
|
||||||
|
"name": "Include BOM",
|
||||||
|
"type": "n8n-nodes-base.spreadsheetFile",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [
|
||||||
|
120,
|
||||||
|
1180
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fields": {
|
||||||
|
"values": [
|
||||||
|
{
|
||||||
|
"name": "X",
|
||||||
|
"stringValue": "={{ $json.a }}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"include": "none",
|
||||||
|
"options": {}
|
||||||
|
},
|
||||||
|
"id": "6f6bccf2-d674-4774-9df9-6f6fd893bace",
|
||||||
|
"name": "Edit with BOM excluded",
|
||||||
|
"type": "n8n-nodes-base.set",
|
||||||
|
"typeVersion": 3.2,
|
||||||
|
"position": [
|
||||||
|
320,
|
||||||
|
960
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fields": {
|
||||||
|
"values": [
|
||||||
|
{
|
||||||
|
"name": "X",
|
||||||
|
"stringValue": "={{ $json.a }}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"include": "none",
|
||||||
|
"options": {}
|
||||||
|
},
|
||||||
|
"id": "27ca5cde-19cb-4bf2-9ab4-7f7e77ad01bd",
|
||||||
|
"name": "Edit with BOM included",
|
||||||
|
"type": "n8n-nodes-base.set",
|
||||||
|
"typeVersion": 3.2,
|
||||||
|
"position": [
|
||||||
|
320,
|
||||||
|
1180
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"connections": {
|
||||||
|
"When clicking \"Execute Workflow\"": {
|
||||||
|
"main": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"node": "Read Binary File",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Exclude BOM": {
|
||||||
|
"main": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"node": "Edit with BOM excluded",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Include BOM": {
|
||||||
|
"main": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"node": "Edit with BOM included",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"Read Binary File": {
|
||||||
|
"main": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"node": "Exclude BOM",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"node": "Include BOM",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,7 +1,4 @@
|
||||||
{
|
{
|
||||||
"meta": {
|
|
||||||
"instanceId": "104a4d08d8897b8bdeb38aaca515021075e0bd8544c983c2bb8c86e6a8e6081c"
|
|
||||||
},
|
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"parameters": {},
|
"parameters": {},
|
||||||
|
@ -29,11 +26,11 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"filePath": "C:\\Users\\spech\\Documents\\GitHub\\n8n-master\\packages\\nodes-base\\nodes\\SpreadsheetFile\\test\\spreadsheet.csv"
|
"fileSelector": "spreadsheet.csv"
|
||||||
},
|
},
|
||||||
"id": "d7620053-eb3d-43dd-b2cd-d60d9a08a9cc",
|
"id": "d7620053-eb3d-43dd-b2cd-d60d9a08a9cc",
|
||||||
"name": "Read Binary File",
|
"name": "Read Binary File",
|
||||||
"type": "n8n-nodes-base.readBinaryFile",
|
"type": "n8n-nodes-base.readBinaryFiles",
|
||||||
"typeVersion": 1,
|
"typeVersion": 1,
|
||||||
"position": [
|
"position": [
|
||||||
840,
|
840,
|
||||||
|
@ -173,6 +170,22 @@
|
||||||
1060,
|
1060,
|
||||||
940
|
940
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"parameters": {
|
||||||
|
"fileFormat": "csv",
|
||||||
|
"options": {
|
||||||
|
"maxRowCount": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"id": "de905389-a11b-4dd8-8416-14d650804445",
|
||||||
|
"name": "Read CSV with Row Limit",
|
||||||
|
"type": "n8n-nodes-base.spreadsheetFile",
|
||||||
|
"typeVersion": 2,
|
||||||
|
"position": [
|
||||||
|
-60,
|
||||||
|
1340
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"connections": {
|
"connections": {
|
||||||
|
@ -245,6 +258,11 @@
|
||||||
"node": "Read From File Read as String",
|
"node": "Read From File Read as String",
|
||||||
"type": "main",
|
"type": "main",
|
||||||
"index": 0
|
"index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"node": "Read CSV with Row Limit",
|
||||||
|
"type": "main",
|
||||||
|
"index": 0
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
|
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
|
||||||
import { pipeline } from 'stream/promises';
|
|
||||||
import type {
|
import type {
|
||||||
IDataObject,
|
IDataObject,
|
||||||
IExecuteFunctions,
|
IExecuteFunctions,
|
||||||
|
@ -85,7 +84,12 @@ export class SpreadsheetFileV2 implements INodeType {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fileFormat === 'csv') {
|
if (fileFormat === 'csv') {
|
||||||
|
const maxRowCount = options.maxRowCount as number;
|
||||||
const parser = createCSVParser({
|
const parser = createCSVParser({
|
||||||
|
delimiter: options.delimiter as string,
|
||||||
|
fromLine: options.fromLine as number,
|
||||||
|
bom: options.enableBOM as boolean,
|
||||||
|
to: maxRowCount > -1 ? maxRowCount : undefined,
|
||||||
columns: options.headerRow !== false,
|
columns: options.headerRow !== false,
|
||||||
onRecord: (record) => {
|
onRecord: (record) => {
|
||||||
rows.push(record);
|
rows.push(record);
|
||||||
|
@ -93,9 +97,18 @@ export class SpreadsheetFileV2 implements INodeType {
|
||||||
});
|
});
|
||||||
if (binaryData.id) {
|
if (binaryData.id) {
|
||||||
const stream = await this.helpers.getBinaryStream(binaryData.id);
|
const stream = await this.helpers.getBinaryStream(binaryData.id);
|
||||||
await pipeline(stream, parser);
|
await new Promise<void>(async (resolve, reject) => {
|
||||||
|
parser.on('error', reject);
|
||||||
|
parser.on('readable', () => {
|
||||||
|
stream.unpipe(parser);
|
||||||
|
stream.destroy();
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
stream.pipe(parser);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
parser.write(binaryData.data, BINARY_ENCODING);
|
parser.write(binaryData.data, BINARY_ENCODING);
|
||||||
|
parser.end();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let workbook: WorkBook;
|
let workbook: WorkBook;
|
||||||
|
|
Loading…
Reference in a new issue