fix(core): Consider timeout in shutdown an error (#8050)

If the process doesn't shutdown within a time limit, exit with error
code.

1. conceptually something timing out is an error.
2. on successful exit we close down the DB connection gracefully. On an
exit timeout we rather not do that, since it will wait for any active
connections to close and would possible block the exit.
This commit is contained in:
Tomi Turtiainen 2023-12-18 10:53:34 +02:00 committed by GitHub
parent aad57e2da1
commit 4cae976a3b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 21 additions and 24 deletions

View file

@ -40,6 +40,12 @@ export abstract class BaseCommand extends Command {
protected isShuttingDown = false;
/**
* How long to wait for graceful shutdown before force killing the process.
* Subclasses can override this value.
*/
protected gracefulShutdownTimeoutInS: number = 30;
async init(): Promise<void> {
await initErrorHandling();
initExpressionEvaluator();
@ -309,9 +315,20 @@ export abstract class BaseCommand extends Command {
return;
}
const forceShutdownTimer = setTimeout(async () => {
// In case that something goes wrong with shutdown we
// kill after timeout no matter what
console.log(`process exited after ${this.gracefulShutdownTimeoutInS}s`);
const errorMsg = `Shutdown timed out after ${this.gracefulShutdownTimeoutInS} seconds`;
await this.exitWithCrash(errorMsg, new Error(errorMsg));
}, this.gracefulShutdownTimeoutInS * 1000);
this.logger.info(`Received ${signal}. Shutting down...`);
this.isShuttingDown = true;
await this.stopProcess();
clearTimeout(forceShutdownTimer);
};
}
}

View file

@ -103,13 +103,6 @@ export class Start extends BaseCommand {
await this.externalHooks?.run('n8n.stop', []);
setTimeout(async () => {
// In case that something goes wrong with shutdown we
// kill after max. 30 seconds no matter what
console.log('process exited after 30s');
await this.exitSuccessFully();
}, 30000);
// Shut down License manager to unclaim any floating entitlements
// Note: While this saves a new license cert to DB, the previous entitlements are still kept in memory so that the shutdown process can complete
await Container.get(License).shutdown();

View file

@ -41,12 +41,6 @@ export class Webhook extends BaseCommand {
try {
await this.externalHooks?.run('n8n.stop', []);
setTimeout(async () => {
// In case that something goes wrong with shutdown we
// kill after max. 30 seconds no matter what
await this.exitSuccessFully();
}, 30000);
// Wait for active workflow executions to finish
const activeExecutionsInstance = Container.get(ActiveExecutions);
let executingWorkflows = activeExecutionsInstance.getActiveExecutions();

View file

@ -81,21 +81,13 @@ export class Worker extends BaseCommand {
try {
await this.externalHooks?.run('n8n.stop', []);
const maxStopTime = config.getEnv('queue.bull.gracefulShutdownTimeout') * 1000;
const stopTime = new Date().getTime() + maxStopTime;
setTimeout(async () => {
// In case that something goes wrong with shutdown we
// kill after max. 30 seconds no matter what
await this.exitSuccessFully();
}, maxStopTime);
const hardStopTime = Date.now() + this.gracefulShutdownTimeoutInS;
// Wait for active workflow executions to finish
let count = 0;
while (Object.keys(Worker.runningJobs).length !== 0) {
if (count++ % 4 === 0) {
const waitLeft = Math.ceil((stopTime - new Date().getTime()) / 1000);
const waitLeft = Math.ceil((hardStopTime - Date.now()) / 1000);
this.logger.info(
`Waiting for ${
Object.keys(Worker.runningJobs).length
@ -272,6 +264,7 @@ export class Worker extends BaseCommand {
}
async init() {
this.gracefulShutdownTimeoutInS = config.getEnv('queue.bull.gracefulShutdownTimeout');
await this.initCrashJournal();
this.logger.debug('Starting n8n worker...');

View file

@ -439,7 +439,7 @@ export const schema = {
env: 'QUEUE_RECOVERY_INTERVAL',
},
gracefulShutdownTimeout: {
doc: 'How long should n8n wait for running executions before exiting worker process',
doc: 'How long should n8n wait for running executions before exiting worker process (seconds)',
format: Number,
default: 30,
env: 'QUEUE_WORKER_TIMEOUT',