refactor(core): Remove watchdog interval (#11295)

2024-12-24 04:04:06 -08:00 · 2024-10-17 13:51:56 +02:00 · 2024-10-17 13:51:56 +02:00 · 83ca7f8e90
parent c79aa01a48
commit 83ca7f8e90
4 changed files with 12 additions and 57 deletions
--- a/packages/@n8n/config/src/configs/scaling-mode.config.ts
+++ b/packages/@n8n/config/src/configs/scaling-mode.config.ts
@ -82,10 +82,6 @@ class BullConfig {
 	@Nested
 	redis: RedisConfig;
 	/** How often (in seconds) to poll the Bull queue to identify executions finished during a Redis crash. `0` to disable. May increase Redis traffic significantly. */
 	@Env('QUEUE_RECOVERY_INTERVAL')
 	queueRecoveryInterval: number = 60; // watchdog interval
 	/** @deprecated How long (in seconds) a worker must wait for active executions to finish before exiting. Use `N8N_GRACEFUL_SHUTDOWN_TIMEOUT` instead */
 	@Env('QUEUE_WORKER_TIMEOUT')
 	gracefulShutdownTimeout: number = 30;
--- a/packages/@n8n/config/test/config.test.ts
+++ b/packages/@n8n/config/test/config.test.ts
@ -211,7 +211,6 @@ describe('GlobalConfig', () => {
 					clusterNodes: '',
 					tls: false,
 				},
 				queueRecoveryInterval: 60,
 				gracefulShutdownTimeout: 30,
 				prefix: 'bull',
 				settings: {
--- a/packages/cli/BREAKING-CHANGES.md
+++ b/packages/cli/BREAKING-CHANGES.md
@ -2,6 +2,16 @@
 This list shows all the versions which include breaking changes and how to upgrade.
 # 1.65.0
 ### What changed?
 Queue polling via the env var `QUEUE_RECOVERY_INTERVAL` has been removed.
 ### When is action necessary?
 If you have set the env var `QUEUE_RECOVERY_INTERVAL`, so you can remove it as it no longer has any effect.
 # 1.63.0
 ### What changed?
--- a/packages/cli/src/workflow-runner.ts
+++ b/packages/cli/src/workflow-runner.ts
@ -2,7 +2,6 @@
 /* eslint-disable @typescript-eslint/no-unsafe-member-access */
 /* eslint-disable @typescript-eslint/no-shadow */
 /* eslint-disable @typescript-eslint/no-unsafe-assignment */
 import { GlobalConfig } from '@n8n/config';
 import { InstanceSettings, WorkflowExecute } from 'n8n-core';
 import type {
 	ExecutionError,
@ -30,7 +29,7 @@ import { ExternalHooks } from '@/external-hooks';
 import { Logger } from '@/logging/logger.service';
 import { NodeTypes } from '@/node-types';
 import type { ScalingService } from '@/scaling/scaling.service';
-import type { Job, JobData, JobResult } from '@/scaling/scaling.types';
+import type { Job, JobData } from '@/scaling/scaling.types';
 import { PermissionChecker } from '@/user-management/permission-checker';
 import * as WorkflowExecuteAdditionalData from '@/workflow-execute-additional-data';
 import * as WorkflowHelpers from '@/workflow-helpers';
@ -439,54 +438,8 @@ export class WorkflowRunner {
 					reject(error);
 				});
 				const jobData: Promise<JobResult> = job.finished();
 				const { queueRecoveryInterval } = Container.get(GlobalConfig).queue.bull;
 				const racingPromises: Array<Promise<JobResult>> = [jobData];
 				let clearWatchdogInterval;
 				if (queueRecoveryInterval > 0) {
 					/** ***********************************************
 					 * Long explanation about what this solves:      *
 					 * This only happens in a very specific scenario *
 					 * when Redis crashes and recovers shortly       *
 					 * but during this time, some execution(s)       *
 					 * finished. The end result is that the main     *
 					 * process will wait indefinitely and never      *
 					 * get a response. This adds an active polling to*
 					 * the queue that allows us to identify that the *
 					 * execution finished and get information from   *
 					 * the database.                                 *
 					 ************************************************ */
 					let watchDogInterval: NodeJS.Timeout | undefined;
 					const watchDog: Promise<JobResult> = new Promise((res) => {
 						watchDogInterval = setInterval(async () => {
 							const currentJob = await this.scalingService.getJob(job.id);
 							// When null means job is finished (not found in queue)
 							if (currentJob === null) {
 								// Mimic worker's success message
 								res({ success: true });
 							}
 						}, queueRecoveryInterval * 1000);
 					});
 					racingPromises.push(watchDog);
 					clearWatchdogInterval = () => {
 						if (watchDogInterval) {
 							clearInterval(watchDogInterval);
 							watchDogInterval = undefined;
 						}
 					};
 				}
 				try {
-					await Promise.race(racingPromises);
+					await job.finished();
 					if (clearWatchdogInterval !== undefined) {
 						clearWatchdogInterval();
 					}
 				} catch (error) {
 					// We use "getWorkflowHooksWorkerExecuter" as "getWorkflowHooksWorkerMain" does not contain the
 					// "workflowExecuteAfter" which we require.
@ -497,9 +450,6 @@ export class WorkflowRunner {
 						{ retryOf: data.retryOf ? data.retryOf.toString() : undefined },
 					);
 					this.logger.error(`Problem with execution ${executionId}: ${error.message}. Aborting.`);
 					if (clearWatchdogInterval !== undefined) {
 						clearWatchdogInterval();
 					}
 					await this.processError(error, new Date(), data.executionMode, executionId, hooks);
 					reject(error);