🐛 Fix "unknown", never-end workflow and not displaying error message (#1978)

* Added try catch blocks to avoid endlessly running workflows

* Added handling for subworkflows

*  Fix one cause of "unkown" status of worklows with "main" mode

*  Fix one cause of "unkown" status of worklows with "own" mode

*  Fix one cause of "unkown" status of worklows with "queue" mode

* Saving database recovery

* 🐛 Fix issue that errors did not get saved correctly and also not
displayed

*  Save workflow timeout correctly as error

* Adding error capture to queued jobs

*  Mark canceled executions as not finished consistently across all
modes

Co-authored-by: Jan Oberhauser <jan.oberhauser@gmail.com>
This commit is contained in:
Omar Ajoue 2021-07-10 11:34:41 +02:00 committed by GitHub
parent abc2f2a515
commit d3da5023f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 232 additions and 97 deletions

View file

@ -387,9 +387,9 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
}
// Leave log message before flatten as that operation increased memory usage a lot and the chance of a crash is highest here
Logger.debug(`Save execution data to database for execution ID ${this.executionId}`, {
executionId: this.executionId,
workflowId: this.workflowData.id,
Logger.debug(`Save execution data to database for execution ID ${this.executionId}`, {
executionId: this.executionId,
workflowId: this.workflowData.id,
finished: fullExecutionData.finished,
stoppedAt: fullExecutionData.stoppedAt,
});
@ -409,12 +409,12 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
executeErrorWorkflow(this.workflowData, fullRunData, this.mode, this.executionId, this.retryOf);
}
} catch (error) {
Logger.error(`Failed saving execution data to DB on execution ID ${this.executionId}`, {
executionId: this.executionId,
Logger.error(`Failed saving execution data to DB on execution ID ${this.executionId}`, {
executionId: this.executionId,
workflowId: this.workflowData.id,
error,
});
if (!isManualMode) {
executeErrorWorkflow(this.workflowData, fullRunData, this.mode, undefined, this.retryOf);
}
@ -608,44 +608,78 @@ export async function executeWorkflow(workflowInfo: IExecuteWorkflowInfo, additi
executionId = parentExecutionId !== undefined ? parentExecutionId : await ActiveExecutions.getInstance().add(runData);
}
const runExecutionData = runData.executionData as IRunExecutionData;
let data;
try {
// Get the needed credentials for the current workflow as they will differ to the ones of the
// calling workflow.
const credentials = await WorkflowCredentials(workflowData!.nodes);
// Get the needed credentials for the current workflow as they will differ to the ones of the
// calling workflow.
const credentials = await WorkflowCredentials(workflowData!.nodes);
// Create new additionalData to have different workflow loaded and to call
// different webooks
const additionalDataIntegrated = await getBase(credentials);
additionalDataIntegrated.hooks = getWorkflowHooksIntegrated(runData.executionMode, executionId, workflowData!, { parentProcessMode: additionalData.hooks!.mode });
// Make sure we pass on the original executeWorkflow function we received
// This one already contains changes to talk to parent process
// and get executionID from `activeExecutions` running on main process
additionalDataIntegrated.executeWorkflow = additionalData.executeWorkflow;
let subworkflowTimeout = additionalData.executionTimeoutTimestamp;
if (workflowData.settings?.executionTimeout !== undefined && workflowData.settings.executionTimeout > 0) {
// We might have received a max timeout timestamp from the parent workflow
// If we did, then we get the minimum time between the two timeouts
// If no timeout was given from the parent, then we use our timeout.
subworkflowTimeout = Math.min(additionalData.executionTimeoutTimestamp || Number.MAX_SAFE_INTEGER, Date.now() + (workflowData.settings.executionTimeout as number * 1000));
}
// Create new additionalData to have different workflow loaded and to call
// different webooks
const additionalDataIntegrated = await getBase(credentials);
additionalDataIntegrated.hooks = getWorkflowHooksIntegrated(runData.executionMode, executionId, workflowData!, { parentProcessMode: additionalData.hooks!.mode });
// Make sure we pass on the original executeWorkflow function we received
// This one already contains changes to talk to parent process
// and get executionID from `activeExecutions` running on main process
additionalDataIntegrated.executeWorkflow = additionalData.executeWorkflow;
additionalDataIntegrated.executionTimeoutTimestamp = subworkflowTimeout;
let subworkflowTimeout = additionalData.executionTimeoutTimestamp;
if (workflowData.settings?.executionTimeout !== undefined && workflowData.settings.executionTimeout > 0) {
// We might have received a max timeout timestamp from the parent workflow
// If we did, then we get the minimum time between the two timeouts
// If no timeout was given from the parent, then we use our timeout.
subworkflowTimeout = Math.min(additionalData.executionTimeoutTimestamp || Number.MAX_SAFE_INTEGER, Date.now() + (workflowData.settings.executionTimeout as number * 1000));
}
const runExecutionData = runData.executionData as IRunExecutionData;
additionalDataIntegrated.executionTimeoutTimestamp = subworkflowTimeout;
// Execute the workflow
const workflowExecute = new WorkflowExecute(additionalDataIntegrated, runData.executionMode, runExecutionData);
if (parentExecutionId !== undefined) {
// Must be changed to become typed
return {
// Execute the workflow
const workflowExecute = new WorkflowExecute(additionalDataIntegrated, runData.executionMode, runExecutionData);
if (parentExecutionId !== undefined) {
// Must be changed to become typed
return {
startedAt: new Date(),
workflow,
workflowExecute,
};
}
data = await workflowExecute.processRunExecutionData(workflow);
} catch (error) {
const fullRunData: IRun = {
data: {
resultData: {
error,
runData: {},
},
},
finished: false,
mode: 'integrated',
startedAt: new Date(),
workflow,
workflowExecute,
stoppedAt: new Date(),
};
// When failing, we might not have finished the execution
// Therefore, database might not contain finished errors.
// Force an update to db as there should be no harm doing this
const fullExecutionData: IExecutionDb = {
data: fullRunData.data,
mode: fullRunData.mode,
finished: fullRunData.finished ? fullRunData.finished : false,
startedAt: fullRunData.startedAt,
stoppedAt: fullRunData.stoppedAt,
workflowData,
};
const executionData = ResponseHelper.flattenExecutionData(fullExecutionData);
await Db.collections.Execution!.update(executionId, executionData as IExecutionFlattedDb);
throw {
...error,
stack: error!.stack,
};
}
const data = await workflowExecute.processRunExecutionData(workflow);
await externalHooks.run('workflow.postExecute', [data, workflowData]);

View file

@ -8,6 +8,7 @@ import {
IBullJobResponse,
ICredentialsOverwrite,
ICredentialsTypeData,
IExecutionDb,
IExecutionFlattedDb,
IExecutionResponse,
IProcessMessageDataHook,
@ -29,6 +30,7 @@ import {
import {
ExecutionError,
IRun,
IWorkflowBase,
LoggerProxy as Logger,
Workflow,
WorkflowExecuteMode,
@ -85,11 +87,15 @@ export class WorkflowRunner {
* @param {string} executionId
* @memberof WorkflowRunner
*/
processError(error: ExecutionError, startedAt: Date, executionMode: WorkflowExecuteMode, executionId: string) {
async processError(error: ExecutionError, startedAt: Date, executionMode: WorkflowExecuteMode, executionId: string, hooks?: WorkflowHooks) {
const fullRunData: IRun = {
data: {
resultData: {
error,
error: {
...error,
message: error.message,
stack: error.stack,
},
runData: {},
},
},
@ -102,6 +108,10 @@ export class WorkflowRunner {
// Remove from active execution with empty data. That will
// set the execution to failed.
this.activeExecutions.remove(executionId, fullRunData);
if (hooks) {
await hooks.executeHookFunctions('workflowExecuteAfter', [fullRunData]);
}
}
/**
@ -179,28 +189,34 @@ export class WorkflowRunner {
// Register the active execution
const executionId = await this.activeExecutions.add(data, undefined);
Logger.verbose(`Execution for workflow ${data.workflowData.name} was assigned id ${executionId}`, {executionId});
let workflowExecution: PCancelable<IRun>;
additionalData.hooks = WorkflowExecuteAdditionalData.getWorkflowHooksMain(data, executionId, true);
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({sessionId: data.sessionId});
try {
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({sessionId: data.sessionId});
let workflowExecution: PCancelable<IRun>;
if (data.executionData !== undefined) {
Logger.debug(`Execution ID ${executionId} had Execution data. Running with payload.`, {executionId});
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode, data.executionData);
workflowExecution = workflowExecute.processRunExecutionData(workflow);
} else if (data.runData === undefined || data.startNodes === undefined || data.startNodes.length === 0 || data.destinationNode === undefined) {
Logger.debug(`Execution ID ${executionId} will run executing all nodes.`, {executionId});
// Execute all nodes
if (data.executionData !== undefined) {
Logger.debug(`Execution ID ${executionId} had Execution data. Running with payload.`, {executionId});
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode, data.executionData);
workflowExecution = workflowExecute.processRunExecutionData(workflow);
} else if (data.runData === undefined || data.startNodes === undefined || data.startNodes.length === 0 || data.destinationNode === undefined) {
Logger.debug(`Execution ID ${executionId} will run executing all nodes.`, {executionId});
// Execute all nodes
// Can execute without webhook so go on
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
workflowExecution = workflowExecute.run(workflow, undefined, data.destinationNode);
} else {
Logger.debug(`Execution ID ${executionId} is a partial execution.`, {executionId});
// Execute only the nodes between start and destination nodes
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
workflowExecution = workflowExecute.runPartialWorkflow(workflow, data.runData, data.startNodes, data.destinationNode);
// Can execute without webhook so go on
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
workflowExecution = workflowExecute.run(workflow, undefined, data.destinationNode);
} else {
Logger.debug(`Execution ID ${executionId} is a partial execution.`, {executionId});
// Execute only the nodes between start and destination nodes
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
workflowExecution = workflowExecute.runPartialWorkflow(workflow, data.runData, data.startNodes, data.destinationNode);
}
} catch (error) {
await this.processError(error, new Date(), data.executionMode, executionId, additionalData.hooks);
throw error;
}
this.activeExecutions.attachWorkflowExecution(executionId, workflowExecution);
@ -247,7 +263,17 @@ export class WorkflowRunner {
removeOnComplete: true,
removeOnFail: true,
};
const job = await this.jobQueue.add(jobData, jobOptions);
let job: Bull.Job;
try {
job = await this.jobQueue.add(jobData, jobOptions);
} catch (error) {
// We use "getWorkflowHooksIntegrated" here as we are just integrated in the "workflowExecuteAfter"
// hook anyway and other get so ignored
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksIntegrated(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
await this.processError(error, new Date(), data.executionMode, executionId, hooks);
return executionId;
}
console.log('Started with ID: ' + job.id.toString());
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksWorkerMain(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
@ -264,7 +290,7 @@ export class WorkflowRunner {
const fullRunData :IRun = {
data: {
resultData: {
error: new WorkflowOperationError('Workflow has been canceled!'),
error: new WorkflowOperationError('Workflow-Execution has been canceled!'),
runData: {},
},
},
@ -280,6 +306,9 @@ export class WorkflowRunner {
const queueRecoveryInterval = config.get('queue.bull.queueRecoveryInterval') as number;
const racingPromises: Array<Promise<IBullJobResponse | object>> = [jobData];
let clearWatchdogInterval;
if (queueRecoveryInterval > 0) {
/*************************************************
* Long explanation about what this solves: *
@ -295,7 +324,7 @@ export class WorkflowRunner {
*************************************************/
let watchDogInterval: NodeJS.Timeout | undefined;
const watchDog = new Promise((res) => {
const watchDog: Promise<object> = new Promise((res) => {
watchDogInterval = setInterval(async () => {
const currentJob = await this.jobQueue.getJob(job.id);
// When null means job is finished (not found in queue)
@ -306,19 +335,43 @@ export class WorkflowRunner {
}, queueRecoveryInterval * 1000);
});
racingPromises.push(watchDog);
const clearWatchdogInterval = () => {
clearWatchdogInterval = () => {
if (watchDogInterval) {
clearInterval(watchDogInterval);
watchDogInterval = undefined;
}
};
}
await Promise.race([jobData, watchDog]);
clearWatchdogInterval();
try {
await Promise.race(racingPromises);
if (clearWatchdogInterval !== undefined) {
clearWatchdogInterval();
}
} catch (error) {
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksWorkerExecuter(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
Logger.error(`Problem with execution ${executionId}: ${error.message}. Aborting.`);
if (clearWatchdogInterval !== undefined) {
clearWatchdogInterval();
}
await this.processError(error, new Date(), data.executionMode, executionId, hooks);
} else {
await jobData;
const fullRunData :IRun = {
data: {
resultData: {
error,
runData: {},
},
},
mode: data.executionMode,
startedAt: new Date(),
stoppedAt: new Date(),
};
this.activeExecutions.remove(executionId, fullRunData);
resolve(fullRunData);
return;
}
@ -427,8 +480,13 @@ export class WorkflowRunner {
const workflowHooks = WorkflowExecuteAdditionalData.getWorkflowHooksMain(data, executionId);
// Send all data to subprocess it needs to run the workflow
subprocess.send({ type: 'startWorkflow', data } as IProcessMessage);
try {
// Send all data to subprocess it needs to run the workflow
subprocess.send({ type: 'startWorkflow', data } as IProcessMessage);
} catch (error) {
await this.processError(error, new Date(), data.executionMode, executionId, workflowHooks);
return executionId;
}
// Start timeout for the execution
let executionTimeout: NodeJS.Timeout;
@ -476,14 +534,14 @@ export class WorkflowRunner {
} else if (message.type === 'processError') {
clearTimeout(executionTimeout);
const executionError = message.data.executionError as ExecutionError;
this.processError(executionError, startedAt, data.executionMode, executionId);
await this.processError(executionError, startedAt, data.executionMode, executionId, workflowHooks);
} else if (message.type === 'processHook') {
this.processHookMessage(workflowHooks, message.data as IProcessMessageDataHook);
} else if (message.type === 'timeout') {
// Execution timed out and its process has been terminated
const timeoutError = new WorkflowOperationError('Workflow execution timed out!');
// No need to add hook here as the subprocess takes care of calling the hooks
this.processError(timeoutError, startedAt, data.executionMode, executionId);
} else if (message.type === 'startExecution') {
const executionId = await this.activeExecutions.add(message.data.runData);
@ -506,13 +564,13 @@ export class WorkflowRunner {
// Execution timed out and its process has been terminated
const timeoutError = new WorkflowOperationError('Workflow execution timed out!');
this.processError(timeoutError, startedAt, data.executionMode, executionId);
await this.processError(timeoutError, startedAt, data.executionMode, executionId, workflowHooks);
} else if (code !== 0) {
Logger.debug(`Subprocess for execution ID ${executionId} finished with error code ${code}.`, {executionId});
// Process did exit with error code, so something went wrong.
const executionError = new WorkflowOperationError('Workflow execution process did crash for an unknown reason!');
this.processError(executionError, startedAt, data.executionMode, executionId);
await this.processError(executionError, startedAt, data.executionMode, executionId, workflowHooks);
}
for(const executionId of childExecutionIds) {

View file

@ -30,6 +30,7 @@ import {
IWorkflowExecuteHooks,
LoggerProxy,
Workflow,
WorkflowExecuteMode,
WorkflowHooks,
WorkflowOperationError,
} from 'n8n-workflow';
@ -315,7 +316,7 @@ process.on('message', async (message: IProcessMessage) => {
for (const executionId of executionIds) {
const childWorkflowExecute = workflowRunner.childExecutions[executionId];
runData = childWorkflowExecute.workflowExecute.getFullRunData(workflowRunner.childExecutions[executionId].startedAt);
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : undefined;
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : new WorkflowOperationError('Workflow-Execution has been canceled!');
// If there is any data send it to parent process, if execution timedout add the error
await childWorkflowExecute.workflowExecute.processSuccessExecution(workflowRunner.childExecutions[executionId].startedAt, childWorkflowExecute.workflow, timeOutError);
@ -324,7 +325,7 @@ process.on('message', async (message: IProcessMessage) => {
// Workflow started already executing
runData = workflowRunner.workflowExecute.getFullRunData(workflowRunner.startedAt);
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : undefined;
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : new WorkflowOperationError('Workflow-Execution has been canceled!');
// If there is any data send it to parent process, if execution timedout add the error
await workflowRunner.workflowExecute.processSuccessExecution(workflowRunner.startedAt, workflowRunner.workflow!, timeOutError);
@ -336,8 +337,8 @@ process.on('message', async (message: IProcessMessage) => {
runData: {},
},
},
finished: message.type !== 'timeout',
mode: workflowRunner.data!.executionMode,
finished: false,
mode: workflowRunner.data ? workflowRunner.data!.executionMode : 'own' as WorkflowExecuteMode,
startedAt: workflowRunner.startedAt,
stoppedAt: new Date(),
};

View file

@ -807,7 +807,7 @@ export class WorkflowExecute {
})()
.then(async () => {
if (gotCancel && executionError === undefined) {
return this.processSuccessExecution(startedAt, workflow, new WorkflowOperationError('Workflow has been canceled!'));
return this.processSuccessExecution(startedAt, workflow, new WorkflowOperationError('Workflow has been canceled or timed out!'));
}
return this.processSuccessExecution(startedAt, workflow, executionError);
})
@ -844,7 +844,11 @@ export class WorkflowExecute {
if (executionError !== undefined) {
Logger.verbose(`Workflow execution finished with error`, { error: executionError, workflowId: workflow.id });
fullRunData.data.resultData.error = executionError;
fullRunData.data.resultData.error = {
...executionError,
message: executionError.message,
stack: executionError.stack,
} as ExecutionError;
} else {
Logger.verbose(`Workflow execution finished successfully`, { workflowId: workflow.id });
fullRunData.finished = true;

View file

@ -213,33 +213,16 @@ export const pushConnection = mixins(
const runDataExecuted = pushData.data;
let runDataExecutedErrorMessage;
const runDataExecutedErrorMessage = this.$getExecutionError(runDataExecuted.data.resultData.error);
// @ts-ignore
const workflow = this.getWorkflow();
if (runDataExecuted.finished !== true) {
// There was a problem with executing the workflow
let errorMessage = 'There was a problem executing the workflow!';
if (runDataExecuted.data.resultData.error && runDataExecuted.data.resultData.error.message) {
let nodeName: string | undefined;
if (runDataExecuted.data.resultData.error.node) {
nodeName = typeof runDataExecuted.data.resultData.error.node === 'string'
? runDataExecuted.data.resultData.error.node
: runDataExecuted.data.resultData.error.node.name;
}
const receivedError = nodeName
? `${nodeName}: ${runDataExecuted.data.resultData.error.message}`
: runDataExecuted.data.resultData.error.message;
errorMessage = `There was a problem executing the workflow:<br /><strong>"${receivedError}"</strong>`;
}
runDataExecutedErrorMessage = errorMessage;
this.$titleSet(workflow.name as string, 'ERROR');
this.$showMessage({
title: 'Problem executing workflow',
message: errorMessage,
message: runDataExecutedErrorMessage,
type: 'error',
});
} else {

View file

@ -3,6 +3,7 @@ import { ElNotificationOptions } from 'element-ui/types/notification';
import mixins from 'vue-typed-mixins';
import { externalHooks } from '@/components/mixins/externalHooks';
import { ExecutionError } from 'n8n-workflow';
export const showMessage = mixins(externalHooks).extend({
methods: {
@ -15,6 +16,27 @@ export const showMessage = mixins(externalHooks).extend({
return Notification(messageData);
},
$getExecutionError(error?: ExecutionError) {
// There was a problem with executing the workflow
let errorMessage = 'There was a problem executing the workflow!';
if (error && error.message) {
let nodeName: string | undefined;
if (error.node) {
nodeName = typeof error.node === 'string'
? error.node
: error.node.name;
}
const receivedError = nodeName
? `${nodeName}: ${error.message}`
: error.message;
errorMessage = `There was a problem executing the workflow:<br /><strong>"${receivedError}"</strong>`;
}
return errorMessage;
},
$showError(error: Error, title: string, message?: string) {
const messageLine = message ? `${message}<br/>` : '';
this.$showMessage({

View file

@ -375,6 +375,39 @@ export default mixins(
});
this.$externalHooks().run('execution.open', { workflowId: data.workflowData.id, workflowName: data.workflowData.name, executionId });
if (data.finished !== true && data.data.resultData.error) {
// Check if any node contains an error
let nodeErrorFound = false;
if (data.data.resultData.runData) {
const runData = data.data.resultData.runData;
errorCheck:
for (const nodeName of Object.keys(runData)) {
for (const taskData of runData[nodeName]) {
if (taskData.error) {
nodeErrorFound = true;
break errorCheck;
}
}
}
}
if (nodeErrorFound === false) {
const errorMessage = this.$getExecutionError(data.data.resultData.error);
this.$showMessage({
title: 'Failed execution',
message: errorMessage,
type: 'error',
});
if (data.data.resultData.error.stack) {
// Display some more information for now in console to make debugging easier
// TODO: Improve this in the future by displaying in UI
console.error(`Execution ${executionId} error:`);
console.error(data.data.resultData.error.stack);
}
}
}
},
async openWorkflowTemplate (templateId: string) {
this.setLoadingText('Loading template');