From 4cae976a3b428bd528fe71ef0b240c0fd6e23bbf Mon Sep 17 00:00:00 2001 From: Tomi Turtiainen <10324676+tomi@users.noreply.github.com> Date: Mon, 18 Dec 2023 10:53:34 +0200 Subject: [PATCH] fix(core): Consider timeout in shutdown an error (#8050) If the process doesn't shutdown within a time limit, exit with error code. 1. conceptually something timing out is an error. 2. on successful exit we close down the DB connection gracefully. On an exit timeout we rather not do that, since it will wait for any active connections to close and would possible block the exit. --- packages/cli/src/commands/BaseCommand.ts | 17 +++++++++++++++++ packages/cli/src/commands/start.ts | 7 ------- packages/cli/src/commands/webhook.ts | 6 ------ packages/cli/src/commands/worker.ts | 13 +++---------- packages/cli/src/config/schema.ts | 2 +- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/packages/cli/src/commands/BaseCommand.ts b/packages/cli/src/commands/BaseCommand.ts index 56a82fe3e3..6f2d03c482 100644 --- a/packages/cli/src/commands/BaseCommand.ts +++ b/packages/cli/src/commands/BaseCommand.ts @@ -40,6 +40,12 @@ export abstract class BaseCommand extends Command { protected isShuttingDown = false; + /** + * How long to wait for graceful shutdown before force killing the process. + * Subclasses can override this value. + */ + protected gracefulShutdownTimeoutInS: number = 30; + async init(): Promise { await initErrorHandling(); initExpressionEvaluator(); @@ -309,9 +315,20 @@ export abstract class BaseCommand extends Command { return; } + const forceShutdownTimer = setTimeout(async () => { + // In case that something goes wrong with shutdown we + // kill after timeout no matter what + console.log(`process exited after ${this.gracefulShutdownTimeoutInS}s`); + const errorMsg = `Shutdown timed out after ${this.gracefulShutdownTimeoutInS} seconds`; + await this.exitWithCrash(errorMsg, new Error(errorMsg)); + }, this.gracefulShutdownTimeoutInS * 1000); + this.logger.info(`Received ${signal}. Shutting down...`); this.isShuttingDown = true; + await this.stopProcess(); + + clearTimeout(forceShutdownTimer); }; } } diff --git a/packages/cli/src/commands/start.ts b/packages/cli/src/commands/start.ts index b56fcfca90..0ea89dc3b6 100644 --- a/packages/cli/src/commands/start.ts +++ b/packages/cli/src/commands/start.ts @@ -103,13 +103,6 @@ export class Start extends BaseCommand { await this.externalHooks?.run('n8n.stop', []); - setTimeout(async () => { - // In case that something goes wrong with shutdown we - // kill after max. 30 seconds no matter what - console.log('process exited after 30s'); - await this.exitSuccessFully(); - }, 30000); - // Shut down License manager to unclaim any floating entitlements // Note: While this saves a new license cert to DB, the previous entitlements are still kept in memory so that the shutdown process can complete await Container.get(License).shutdown(); diff --git a/packages/cli/src/commands/webhook.ts b/packages/cli/src/commands/webhook.ts index 49bda1d53e..07374ecf03 100644 --- a/packages/cli/src/commands/webhook.ts +++ b/packages/cli/src/commands/webhook.ts @@ -41,12 +41,6 @@ export class Webhook extends BaseCommand { try { await this.externalHooks?.run('n8n.stop', []); - setTimeout(async () => { - // In case that something goes wrong with shutdown we - // kill after max. 30 seconds no matter what - await this.exitSuccessFully(); - }, 30000); - // Wait for active workflow executions to finish const activeExecutionsInstance = Container.get(ActiveExecutions); let executingWorkflows = activeExecutionsInstance.getActiveExecutions(); diff --git a/packages/cli/src/commands/worker.ts b/packages/cli/src/commands/worker.ts index 06ae362301..d868e5c323 100644 --- a/packages/cli/src/commands/worker.ts +++ b/packages/cli/src/commands/worker.ts @@ -81,21 +81,13 @@ export class Worker extends BaseCommand { try { await this.externalHooks?.run('n8n.stop', []); - const maxStopTime = config.getEnv('queue.bull.gracefulShutdownTimeout') * 1000; - - const stopTime = new Date().getTime() + maxStopTime; - - setTimeout(async () => { - // In case that something goes wrong with shutdown we - // kill after max. 30 seconds no matter what - await this.exitSuccessFully(); - }, maxStopTime); + const hardStopTime = Date.now() + this.gracefulShutdownTimeoutInS; // Wait for active workflow executions to finish let count = 0; while (Object.keys(Worker.runningJobs).length !== 0) { if (count++ % 4 === 0) { - const waitLeft = Math.ceil((stopTime - new Date().getTime()) / 1000); + const waitLeft = Math.ceil((hardStopTime - Date.now()) / 1000); this.logger.info( `Waiting for ${ Object.keys(Worker.runningJobs).length @@ -272,6 +264,7 @@ export class Worker extends BaseCommand { } async init() { + this.gracefulShutdownTimeoutInS = config.getEnv('queue.bull.gracefulShutdownTimeout'); await this.initCrashJournal(); this.logger.debug('Starting n8n worker...'); diff --git a/packages/cli/src/config/schema.ts b/packages/cli/src/config/schema.ts index bd0d3072ac..af82b2484f 100644 --- a/packages/cli/src/config/schema.ts +++ b/packages/cli/src/config/schema.ts @@ -439,7 +439,7 @@ export const schema = { env: 'QUEUE_RECOVERY_INTERVAL', }, gracefulShutdownTimeout: { - doc: 'How long should n8n wait for running executions before exiting worker process', + doc: 'How long should n8n wait for running executions before exiting worker process (seconds)', format: Number, default: 30, env: 'QUEUE_WORKER_TIMEOUT',