mirror of
https://github.com/n8n-io/n8n.git
synced 2024-11-09 22:24:05 -08:00
feat(Spreadsheet File Node): Improve CSV parsing (#7448)
This adds support for 1. custom delimiters 2. reading offsets to avoid having to read a large CSV all at once 3. excluding byte-order-mark NODE-861 #7443
This commit is contained in:
parent
d8531a53b9
commit
79f23fb939
|
@ -201,6 +201,59 @@ export const optionsProperties: INodeProperties[] = [
|
|||
default: true,
|
||||
description: 'Whether the first row of the file contains the header names',
|
||||
},
|
||||
{
|
||||
displayName: 'Delimiter',
|
||||
name: 'delimiter',
|
||||
type: 'string',
|
||||
displayOptions: {
|
||||
show: {
|
||||
'/operation': ['fromFile'],
|
||||
'/fileFormat': ['csv'],
|
||||
},
|
||||
},
|
||||
default: ',',
|
||||
description: 'Set the field delimiter',
|
||||
},
|
||||
{
|
||||
displayName: 'Starting Line',
|
||||
name: 'fromLine',
|
||||
type: 'number',
|
||||
displayOptions: {
|
||||
show: {
|
||||
'/operation': ['fromFile'],
|
||||
'/fileFormat': ['csv'],
|
||||
},
|
||||
},
|
||||
default: 0,
|
||||
description: 'Start handling records from the requested line number',
|
||||
},
|
||||
{
|
||||
displayName: 'Max Number of Rows to Load',
|
||||
name: 'maxRowCount',
|
||||
type: 'number',
|
||||
displayOptions: {
|
||||
show: {
|
||||
'/operation': ['fromFile'],
|
||||
'/fileFormat': ['csv'],
|
||||
},
|
||||
},
|
||||
default: -1,
|
||||
description: 'Stop handling records after the requested number of rows are read',
|
||||
},
|
||||
{
|
||||
displayName: 'Exclude Byte Order Mark (BOM)',
|
||||
name: 'enableBOM',
|
||||
type: 'boolean',
|
||||
displayOptions: {
|
||||
show: {
|
||||
'/operation': ['fromFile'],
|
||||
'/fileFormat': ['csv'],
|
||||
},
|
||||
},
|
||||
default: false,
|
||||
description:
|
||||
'Whether to detect and exclude the byte-order-mark from the CSV Input if present',
|
||||
},
|
||||
{
|
||||
displayName: 'Include Empty Cells',
|
||||
name: 'includeEmptyCells',
|
||||
|
|
|
@ -1,24 +1,28 @@
|
|||
import path from 'path';
|
||||
import type { IWorkflowBase } from 'n8n-workflow';
|
||||
import * as Helpers from '@test/nodes/Helpers';
|
||||
import type { WorkflowTestData } from '@test/nodes/types';
|
||||
|
||||
import { executeWorkflow } from '@test/nodes/ExecuteWorkflow';
|
||||
import path from 'path';
|
||||
|
||||
describe('Execute Spreadsheet File Node', () => {
|
||||
beforeEach(async () => {
|
||||
await Helpers.initBinaryDataService();
|
||||
});
|
||||
|
||||
// replace workflow json 'Read Binary File' node's filePath to local file
|
||||
const workflow = Helpers.readJsonFileSync('nodes/SpreadsheetFile/test/workflow.json');
|
||||
const node = workflow.nodes.find((n: any) => n.name === 'Read Binary File');
|
||||
node.parameters.filePath = path.join(__dirname, 'spreadsheet.csv');
|
||||
const loadWorkflow = (fileName: string, csvName: string) => {
|
||||
const workflow = Helpers.readJsonFileSync<IWorkflowBase>(
|
||||
`nodes/SpreadsheetFile/test/${fileName}`,
|
||||
);
|
||||
const node = workflow.nodes.find((n) => n.name === 'Read Binary File');
|
||||
node!.parameters.fileSelector = path.join(__dirname, csvName);
|
||||
return workflow;
|
||||
};
|
||||
|
||||
const tests: WorkflowTestData[] = [
|
||||
{
|
||||
description: 'execute workflow.json',
|
||||
input: {
|
||||
workflowData: workflow,
|
||||
workflowData: loadWorkflow('workflow.json', 'spreadsheet.csv'),
|
||||
},
|
||||
output: {
|
||||
nodeData: {
|
||||
|
@ -78,6 +82,7 @@ describe('Execute Spreadsheet File Node', () => {
|
|||
},
|
||||
],
|
||||
],
|
||||
'Read CSV with Row Limit': [[{ json: { A: '1', B: '2', C: '3' } }]],
|
||||
'Write To File CSV': [
|
||||
[
|
||||
{
|
||||
|
@ -149,6 +154,18 @@ describe('Execute Spreadsheet File Node', () => {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: 'execute workflow.bom.json',
|
||||
input: {
|
||||
workflowData: loadWorkflow('workflow.bom.json', 'bom.csv'),
|
||||
},
|
||||
output: {
|
||||
nodeData: {
|
||||
'Edit with BOM included': [[{ json: { X: null } }]],
|
||||
'Edit with BOM excluded': [[{ json: { X: '1' } }]],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const nodeTypes = Helpers.setup(tests);
|
||||
|
|
2
packages/nodes-base/nodes/SpreadsheetFile/test/bom.csv
Normal file
2
packages/nodes-base/nodes/SpreadsheetFile/test/bom.csv
Normal file
|
@ -0,0 +1,2 @@
|
|||
a,b,c
|
||||
1,2,3
|
|
155
packages/nodes-base/nodes/SpreadsheetFile/test/workflow.bom.json
Normal file
155
packages/nodes-base/nodes/SpreadsheetFile/test/workflow.bom.json
Normal file
|
@ -0,0 +1,155 @@
|
|||
{
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {},
|
||||
"id": "40bf604f-19f9-43e7-8bbb-74c36925f154",
|
||||
"name": "When clicking \"Execute Workflow\"",
|
||||
"type": "n8n-nodes-base.manualTrigger",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
-320,
|
||||
1040
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fileSelector": "bom.csv"
|
||||
},
|
||||
"id": "623ea890-8882-4273-973e-834652d823b5",
|
||||
"name": "Read Binary File",
|
||||
"type": "n8n-nodes-base.readBinaryFiles",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
-100,
|
||||
1040
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fileFormat": "csv",
|
||||
"options": {
|
||||
"enableBOM": true
|
||||
}
|
||||
},
|
||||
"id": "c8cca5fb-e119-4ca1-a597-4f051a7f64ea",
|
||||
"name": "Exclude BOM",
|
||||
"type": "n8n-nodes-base.spreadsheetFile",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
120,
|
||||
960
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fileFormat": "csv",
|
||||
"options": {
|
||||
"enableBOM": false
|
||||
}
|
||||
},
|
||||
"id": "56ec11dc-966b-4d06-b8c0-61475b30333d",
|
||||
"name": "Include BOM",
|
||||
"type": "n8n-nodes-base.spreadsheetFile",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
120,
|
||||
1180
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fields": {
|
||||
"values": [
|
||||
{
|
||||
"name": "X",
|
||||
"stringValue": "={{ $json.a }}"
|
||||
}
|
||||
]
|
||||
},
|
||||
"include": "none",
|
||||
"options": {}
|
||||
},
|
||||
"id": "6f6bccf2-d674-4774-9df9-6f6fd893bace",
|
||||
"name": "Edit with BOM excluded",
|
||||
"type": "n8n-nodes-base.set",
|
||||
"typeVersion": 3.2,
|
||||
"position": [
|
||||
320,
|
||||
960
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fields": {
|
||||
"values": [
|
||||
{
|
||||
"name": "X",
|
||||
"stringValue": "={{ $json.a }}"
|
||||
}
|
||||
]
|
||||
},
|
||||
"include": "none",
|
||||
"options": {}
|
||||
},
|
||||
"id": "27ca5cde-19cb-4bf2-9ab4-7f7e77ad01bd",
|
||||
"name": "Edit with BOM included",
|
||||
"type": "n8n-nodes-base.set",
|
||||
"typeVersion": 3.2,
|
||||
"position": [
|
||||
320,
|
||||
1180
|
||||
]
|
||||
}
|
||||
],
|
||||
"connections": {
|
||||
"When clicking \"Execute Workflow\"": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Read Binary File",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Exclude BOM": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Edit with BOM excluded",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Include BOM": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Edit with BOM included",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
},
|
||||
"Read Binary File": {
|
||||
"main": [
|
||||
[
|
||||
{
|
||||
"node": "Exclude BOM",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"node": "Include BOM",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,4 @@
|
|||
{
|
||||
"meta": {
|
||||
"instanceId": "104a4d08d8897b8bdeb38aaca515021075e0bd8544c983c2bb8c86e6a8e6081c"
|
||||
},
|
||||
"nodes": [
|
||||
{
|
||||
"parameters": {},
|
||||
|
@ -29,11 +26,11 @@
|
|||
},
|
||||
{
|
||||
"parameters": {
|
||||
"filePath": "C:\\Users\\spech\\Documents\\GitHub\\n8n-master\\packages\\nodes-base\\nodes\\SpreadsheetFile\\test\\spreadsheet.csv"
|
||||
"fileSelector": "spreadsheet.csv"
|
||||
},
|
||||
"id": "d7620053-eb3d-43dd-b2cd-d60d9a08a9cc",
|
||||
"name": "Read Binary File",
|
||||
"type": "n8n-nodes-base.readBinaryFile",
|
||||
"type": "n8n-nodes-base.readBinaryFiles",
|
||||
"typeVersion": 1,
|
||||
"position": [
|
||||
840,
|
||||
|
@ -173,6 +170,22 @@
|
|||
1060,
|
||||
940
|
||||
]
|
||||
},
|
||||
{
|
||||
"parameters": {
|
||||
"fileFormat": "csv",
|
||||
"options": {
|
||||
"maxRowCount": 1
|
||||
}
|
||||
},
|
||||
"id": "de905389-a11b-4dd8-8416-14d650804445",
|
||||
"name": "Read CSV with Row Limit",
|
||||
"type": "n8n-nodes-base.spreadsheetFile",
|
||||
"typeVersion": 2,
|
||||
"position": [
|
||||
-60,
|
||||
1340
|
||||
]
|
||||
}
|
||||
],
|
||||
"connections": {
|
||||
|
@ -245,9 +258,14 @@
|
|||
"node": "Read From File Read as String",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"node": "Read CSV with Row Limit",
|
||||
"type": "main",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
/* eslint-disable n8n-nodes-base/node-filename-against-convention */
|
||||
import { pipeline } from 'stream/promises';
|
||||
import type {
|
||||
IDataObject,
|
||||
IExecuteFunctions,
|
||||
|
@ -85,7 +84,12 @@ export class SpreadsheetFileV2 implements INodeType {
|
|||
}
|
||||
|
||||
if (fileFormat === 'csv') {
|
||||
const maxRowCount = options.maxRowCount as number;
|
||||
const parser = createCSVParser({
|
||||
delimiter: options.delimiter as string,
|
||||
fromLine: options.fromLine as number,
|
||||
bom: options.enableBOM as boolean,
|
||||
to: maxRowCount > -1 ? maxRowCount : undefined,
|
||||
columns: options.headerRow !== false,
|
||||
onRecord: (record) => {
|
||||
rows.push(record);
|
||||
|
@ -93,9 +97,18 @@ export class SpreadsheetFileV2 implements INodeType {
|
|||
});
|
||||
if (binaryData.id) {
|
||||
const stream = await this.helpers.getBinaryStream(binaryData.id);
|
||||
await pipeline(stream, parser);
|
||||
await new Promise<void>(async (resolve, reject) => {
|
||||
parser.on('error', reject);
|
||||
parser.on('readable', () => {
|
||||
stream.unpipe(parser);
|
||||
stream.destroy();
|
||||
resolve();
|
||||
});
|
||||
stream.pipe(parser);
|
||||
});
|
||||
} else {
|
||||
parser.write(binaryData.data, BINARY_ENCODING);
|
||||
parser.end();
|
||||
}
|
||||
} else {
|
||||
let workbook: WorkBook;
|
||||
|
|
Loading…
Reference in a new issue