From eabf1609577cd94a6bad5020c34378d840a13bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Ovejero?= Date: Mon, 27 Jan 2025 13:44:20 +0100 Subject: [PATCH] fix(core): Handle max stalled count error better (#12824) --- packages/cli/src/errors/max-stalled-count.error.ts | 11 +++++++---- packages/cli/src/scaling/scaling.service.ts | 5 ----- packages/cli/src/workflow-runner.ts | 10 ++++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/packages/cli/src/errors/max-stalled-count.error.ts b/packages/cli/src/errors/max-stalled-count.error.ts index 653ca18eac..38f73023a7 100644 --- a/packages/cli/src/errors/max-stalled-count.error.ts +++ b/packages/cli/src/errors/max-stalled-count.error.ts @@ -5,9 +5,12 @@ import { ApplicationError } from 'n8n-workflow'; */ export class MaxStalledCountError extends ApplicationError { constructor(cause: Error) { - super('The execution has reached the maximum number of attempts and will no longer retry.', { - level: 'warning', - cause, - }); + super( + 'This execution failed to be processed too many times and will no longer retry. To allow this execution to complete, please break down your workflow or scale up your workers or adjust your worker settings.', + { + level: 'warning', + cause, + }, + ); } } diff --git a/packages/cli/src/scaling/scaling.service.ts b/packages/cli/src/scaling/scaling.service.ts index f20d0764c6..7c48ce57e2 100644 --- a/packages/cli/src/scaling/scaling.service.ts +++ b/packages/cli/src/scaling/scaling.service.ts @@ -17,7 +17,6 @@ import config from '@/config'; import { HIGHEST_SHUTDOWN_PRIORITY, Time } from '@/constants'; import { ExecutionRepository } from '@/databases/repositories/execution.repository'; import { OnShutdown } from '@/decorators/on-shutdown'; -import { MaxStalledCountError } from '@/errors/max-stalled-count.error'; import { EventService } from '@/events/event.service'; import { OrchestrationService } from '@/services/orchestration.service'; import { assertNever } from '@/utils'; @@ -271,10 +270,6 @@ export class ScalingService { this.queue.on('error', (error: Error) => { if ('code' in error && error.code === 'ECONNREFUSED') return; // handled by RedisClientService.retryStrategy - if (error.message.includes('job stalled more than maxStalledCount')) { - throw new MaxStalledCountError(error); - } - /** * Non-recoverable error on worker start with Redis unavailable. * Even if Redis recovers, worker will remain unable to process jobs. diff --git a/packages/cli/src/workflow-runner.ts b/packages/cli/src/workflow-runner.ts index 148df7edcd..5d80459d93 100644 --- a/packages/cli/src/workflow-runner.ts +++ b/packages/cli/src/workflow-runner.ts @@ -37,6 +37,8 @@ import * as WorkflowExecuteAdditionalData from '@/workflow-execute-additional-da import { generateFailedExecutionFromError } from '@/workflow-helpers'; import { WorkflowStaticDataService } from '@/workflows/workflow-static-data.service'; +import { MaxStalledCountError } from './errors/max-stalled-count.error'; + @Service() export class WorkflowRunner { private scalingService: ScalingService; @@ -416,6 +418,13 @@ export class WorkflowRunner { try { await job.finished(); } catch (error) { + if ( + error instanceof Error && + error.message.includes('job stalled more than maxStalledCount') + ) { + error = new MaxStalledCountError(error); + } + // We use "getWorkflowHooksWorkerExecuter" as "getWorkflowHooksWorkerMain" does not contain the // "workflowExecuteAfter" which we require. const hooks = getWorkflowHooksWorkerExecuter( @@ -424,6 +433,7 @@ export class WorkflowRunner { data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined }, ); + await this.processError(error, new Date(), data.executionMode, executionId, hooks); reject(error);