n8n/packages/cli/src/deduplication/deduplication-helper.ts

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

357 lines
11 KiB
TypeScript
Raw Normal View History

import { createHash } from 'crypto';
import {
type ICheckProcessedContextData,
type IDataDeduplicator,
type ICheckProcessedOptions,
type IDeduplicationOutput,
type DeduplicationScope,
type DeduplicationItemTypes,
type DeduplicationMode,
tryToParseDateTime,
} from 'n8n-workflow';
import * as assert from 'node:assert/strict';
import { Container } from 'typedi';
import type { ProcessedData } from '@/databases/entities/processed-data';
import { ProcessedDataRepository } from '@/databases/repositories/processed-data.repository';
import { DeduplicationError } from '@/errors/deduplication.error';
import type { IProcessedDataEntries, IProcessedDataLatest } from '@/interfaces';
export class DeduplicationHelper implements IDataDeduplicator {
private static sortEntries(
items: DeduplicationItemTypes[],
mode: DeduplicationMode,
): DeduplicationItemTypes[] {
return items.slice().sort((a, b) => DeduplicationHelper.compareValues(mode, a, b));
}
/**
* Compares two values based on the provided mode ('latestIncrementalKey' or 'latestDate').
*
* @param {DeduplicationMode} mode - The mode to determine the comparison logic. Can be either:
* - 'latestIncrementalKey': Compares numeric values and returns true if `value1` is greater than `value2`.
* - 'latestDate': Compares date strings and returns true if `value1` is a later date than `value2`.
*
* @param {DeduplicationItemTypes} value1 - The first value to compare.
* - If the mode is 'latestIncrementalKey', this should be a numeric value or a string that can be converted to a number.
* - If the mode is 'latestDate', this should be a valid date string.
*
* @param {DeduplicationItemTypes} value2 - The second value to compare.
* - If the mode is 'latestIncrementalKey', this should be a numeric value or a string that can be converted to a number.
* - If the mode is 'latestDate', this should be a valid date string.
*
* @returns {boolean} - Returns `true` if `value1` is greater than `value2` based on the comparison mode.
* - In 'latestIncrementalKey' mode, it returns `true` if `value1` is numerically greater than `value2`.
* - In 'latestDate' mode, it returns `true` if `value1` is a later date than `value2`.
*
* @throws {DeduplicationError} - Throws an error if:
* - The mode is 'latestIncrementalKey' and the values are not valid numbers.
* - The mode is 'latestDate' and the values are not valid date strings.
* - An unsupported mode is provided.
*/
private static compareValues(
mode: DeduplicationMode,
value1: DeduplicationItemTypes,
value2: DeduplicationItemTypes,
): 1 | 0 | -1 {
if (mode === 'latestIncrementalKey') {
const num1 = Number(value1);
const num2 = Number(value2);
if (!isNaN(num1) && !isNaN(num2)) {
return num1 === num2 ? 0 : num1 > num2 ? 1 : -1;
}
throw new DeduplicationError(
'Invalid value. Only numbers are supported in mode "latestIncrementalKey"',
);
} else if (mode === 'latestDate') {
try {
const date1 = tryToParseDateTime(value1);
const date2 = tryToParseDateTime(value2);
return date1 === date2 ? 0 : date1 > date2 ? 1 : -1;
} catch (error) {
throw new DeduplicationError(
'Invalid value. Only valid dates are supported in mode "latestDate"',
);
}
} else {
throw new DeduplicationError(
"Invalid mode. Only 'latestIncrementalKey' and 'latestDate' are supported.",
);
}
}
private static createContext(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
): string {
if (scope === 'node') {
if (!contextData.node) {
throw new DeduplicationError(
"No node information has been provided and so cannot use scope 'node'",
);
}
// Use the node ID to make sure that the data can still be accessed and does not get deleted
// whenever the node gets renamed
return `n:${contextData.node.id}`;
}
return '';
}
private static createValueHash(value: DeduplicationItemTypes): string {
return createHash('md5').update(value.toString()).digest('base64');
}
private async findProcessedData(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
): Promise<ProcessedData | null> {
return await Container.get(ProcessedDataRepository).findOne({
where: {
workflowId: contextData.workflow.id,
context: DeduplicationHelper.createContext(scope, contextData),
},
});
}
private validateMode(processedData: ProcessedData | null, options: ICheckProcessedOptions) {
if (processedData && processedData.value.mode !== options.mode) {
throw new DeduplicationError(
'Deduplication data was originally saved with an incompatible setting of the Keep Items Where parameter. Try Clean Database operation to reset.',
);
}
}
private processedDataHasEntries(
data: IProcessedDataEntries | IProcessedDataLatest,
): data is IProcessedDataEntries {
return Array.isArray(data.data);
}
private processedDataIsLatest(
data: IProcessedDataEntries | IProcessedDataLatest,
): data is IProcessedDataLatest {
return data && !Array.isArray(data.data);
}
private async handleLatestModes(
items: DeduplicationItemTypes[],
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
processedData: ProcessedData | null,
dbContext: string,
): Promise<IDeduplicationOutput> {
const incomingItems = DeduplicationHelper.sortEntries(items, options.mode);
if (!processedData) {
// All items are new so add new entries
await Container.get(ProcessedDataRepository).insert({
workflowId: contextData.workflow.id,
context: dbContext,
value: {
mode: options.mode,
data: incomingItems.pop(),
},
});
return {
new: items,
processed: [],
};
}
const returnData: IDeduplicationOutput = {
new: [],
processed: [],
};
if (!this.processedDataIsLatest(processedData.value)) {
return returnData;
}
let largestValue = processedData.value.data;
const processedDataValue = processedData.value;
incomingItems.forEach((item) => {
if (DeduplicationHelper.compareValues(options.mode, item, processedDataValue.data) === 1) {
returnData.new.push(item);
if (DeduplicationHelper.compareValues(options.mode, item, largestValue) === 1) {
largestValue = item;
}
} else {
returnData.processed.push(item);
}
});
processedData.value.data = largestValue;
await Container.get(ProcessedDataRepository).update(
{ workflowId: processedData.workflowId, context: processedData.context },
processedData,
);
return returnData;
}
private async handleHashedItems(
items: DeduplicationItemTypes[],
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
processedData: ProcessedData | null,
dbContext: string,
): Promise<IDeduplicationOutput> {
const hashedItems = items.map((item) => DeduplicationHelper.createValueHash(item));
if (!processedData) {
// All items are new so add new entries
if (options.maxEntries) {
hashedItems.splice(0, hashedItems.length - options.maxEntries);
}
await Container.get(ProcessedDataRepository).insert({
workflowId: contextData.workflow.id,
context: dbContext,
value: {
mode: options.mode,
data: hashedItems,
},
});
return {
new: items,
processed: [],
};
}
const returnData: IDeduplicationOutput = {
new: [],
processed: [],
};
if (!this.processedDataHasEntries(processedData.value)) {
return returnData;
}
const processedDataValue = processedData.value;
const processedItemsSet = new Set(processedDataValue.data);
hashedItems.forEach((item, index) => {
if (processedItemsSet.has(item)) {
returnData.processed.push(items[index]);
} else {
returnData.new.push(items[index]);
processedDataValue.data.push(item);
}
});
if (options.maxEntries) {
processedDataValue.data.splice(0, processedDataValue.data.length - options.maxEntries);
}
await Container.get(ProcessedDataRepository).update(
{ workflowId: processedData.workflowId, context: processedData.context },
processedData,
);
return returnData;
}
async checkProcessedAndRecord(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<IDeduplicationOutput> {
const dbContext = DeduplicationHelper.createContext(scope, contextData);
assert.ok(contextData.workflow.id);
const processedData = await this.findProcessedData(scope, contextData);
this.validateMode(processedData, options);
if (['latestIncrementalKey', 'latestDate'].includes(options.mode)) {
return await this.handleLatestModes(items, contextData, options, processedData, dbContext);
}
//mode entries
return await this.handleHashedItems(items, contextData, options, processedData, dbContext);
}
async removeProcessed(
items: DeduplicationItemTypes[],
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<void> {
if (['latestIncrementalKey', 'latestDate'].includes(options.mode)) {
throw new DeduplicationError('Removing processed data is not possible in mode "latest"');
}
assert.ok(contextData.workflow.id);
const processedData = await Container.get(ProcessedDataRepository).findOne({
where: {
workflowId: contextData.workflow.id,
context: DeduplicationHelper.createContext(scope, contextData),
},
});
if (!processedData) {
return;
}
const hashedItems = items.map((item) => DeduplicationHelper.createValueHash(item));
if (!this.processedDataHasEntries(processedData.value)) {
return;
}
const processedDataValue = processedData.value;
hashedItems.forEach((item) => {
const index = processedDataValue.data.findIndex((value) => value === item);
if (index !== -1) {
processedDataValue.data.splice(index, 1);
}
});
await Container.get(ProcessedDataRepository).update(
{ workflowId: processedData.workflowId, context: processedData.context },
processedData,
);
}
async clearAllProcessedItems(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
): Promise<void> {
await Container.get(ProcessedDataRepository).delete({
workflowId: contextData.workflow.id,
context: DeduplicationHelper.createContext(scope, contextData),
});
}
async getProcessedDataCount(
scope: DeduplicationScope,
contextData: ICheckProcessedContextData,
options: ICheckProcessedOptions,
): Promise<number> {
const processedDataRepository = Container.get(ProcessedDataRepository);
const processedData = await processedDataRepository.findOne({
where: {
workflowId: contextData.workflow.id,
context: DeduplicationHelper.createContext(scope, contextData),
},
});
if (
options.mode === 'entries' &&
processedData &&
this.processedDataHasEntries(processedData.value)
) {
return processedData.value.data.length;
} else {
return 0;
}
}
}