mirror of
https://github.com/n8n-io/n8n.git
synced 2025-01-25 11:31:38 -08:00
fix(core): Handle Redis disconnects gracefully (#11007)
Some checks are pending
Test Master / install-and-build (push) Waiting to run
Test Master / Unit tests (18.x) (push) Blocked by required conditions
Test Master / Unit tests (20.x) (push) Blocked by required conditions
Test Master / Unit tests (22.4) (push) Blocked by required conditions
Test Master / Lint (push) Blocked by required conditions
Test Master / Notify Slack on failure (push) Blocked by required conditions
Benchmark Docker Image CI / build (push) Waiting to run
Some checks are pending
Test Master / install-and-build (push) Waiting to run
Test Master / Unit tests (18.x) (push) Blocked by required conditions
Test Master / Unit tests (20.x) (push) Blocked by required conditions
Test Master / Unit tests (22.4) (push) Blocked by required conditions
Test Master / Lint (push) Blocked by required conditions
Test Master / Notify Slack on failure (push) Blocked by required conditions
Benchmark Docker Image CI / build (push) Waiting to run
This commit is contained in:
parent
805a1140c9
commit
cd916480c2
37
packages/cli/src/decorators/debounce.ts
Normal file
37
packages/cli/src/decorators/debounce.ts
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import debounce from 'lodash/debounce';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Debounce a class method using `lodash/debounce`.
|
||||||
|
*
|
||||||
|
* @param waitMs - Number of milliseconds to debounce method by.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```
|
||||||
|
* class MyClass {
|
||||||
|
* @Debounce(1000)
|
||||||
|
* async myMethod() {
|
||||||
|
* // debounced
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export const Debounce =
|
||||||
|
(waitMs: number): MethodDecorator =>
|
||||||
|
<T>(
|
||||||
|
_: object,
|
||||||
|
methodName: string,
|
||||||
|
originalDescriptor: PropertyDescriptor,
|
||||||
|
): TypedPropertyDescriptor<T> => ({
|
||||||
|
configurable: true,
|
||||||
|
|
||||||
|
get() {
|
||||||
|
const debouncedFn = debounce(originalDescriptor.value, waitMs);
|
||||||
|
|
||||||
|
Object.defineProperty(this, methodName, {
|
||||||
|
configurable: false,
|
||||||
|
value: debouncedFn,
|
||||||
|
});
|
||||||
|
|
||||||
|
return debouncedFn as T;
|
||||||
|
},
|
||||||
|
});
|
|
@ -24,8 +24,6 @@ export class Publisher {
|
||||||
if (config.getEnv('executions.mode') !== 'queue') return;
|
if (config.getEnv('executions.mode') !== 'queue') return;
|
||||||
|
|
||||||
this.client = this.redisClientService.createClient({ type: 'publisher(n8n)' });
|
this.client = this.redisClientService.createClient({ type: 'publisher(n8n)' });
|
||||||
|
|
||||||
this.client.on('error', (error) => this.logger.error(error.message));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getClient() {
|
getClient() {
|
||||||
|
|
|
@ -27,8 +27,6 @@ export class Subscriber {
|
||||||
|
|
||||||
this.client = this.redisClientService.createClient({ type: 'subscriber(n8n)' });
|
this.client = this.redisClientService.createClient({ type: 'subscriber(n8n)' });
|
||||||
|
|
||||||
this.client.on('error', (error) => this.logger.error(error.message));
|
|
||||||
|
|
||||||
this.client.on('message', (channel: PubSub.Channel, message) => {
|
this.client.on('message', (channel: PubSub.Channel, message) => {
|
||||||
this.handlers.get(channel)?.(message);
|
this.handlers.get(channel)?.(message);
|
||||||
});
|
});
|
||||||
|
|
|
@ -173,39 +173,11 @@ export class ScalingService {
|
||||||
// #region Listeners
|
// #region Listeners
|
||||||
|
|
||||||
private registerListeners() {
|
private registerListeners() {
|
||||||
let latestAttemptTs = 0;
|
|
||||||
let cumulativeTimeoutMs = 0;
|
|
||||||
|
|
||||||
const MAX_TIMEOUT_MS = this.globalConfig.queue.bull.redis.timeoutThreshold;
|
|
||||||
const RESET_LENGTH_MS = 30_000;
|
|
||||||
|
|
||||||
this.queue.on('error', (error: Error) => {
|
this.queue.on('error', (error: Error) => {
|
||||||
|
if ('code' in error && error.code === 'ECONNREFUSED') return; // handled by RedisClientService.retryStrategy
|
||||||
|
|
||||||
this.logger.error('[ScalingService] Queue errored', { error });
|
this.logger.error('[ScalingService] Queue errored', { error });
|
||||||
|
|
||||||
/**
|
|
||||||
* On Redis connection failure, try to reconnect. On every failed attempt,
|
|
||||||
* increment a cumulative timeout - if this exceeds a limit, exit the
|
|
||||||
* process. Reset the cumulative timeout if >30s between retries.
|
|
||||||
*/
|
|
||||||
if (error.message.includes('ECONNREFUSED')) {
|
|
||||||
const nowTs = Date.now();
|
|
||||||
if (nowTs - latestAttemptTs > RESET_LENGTH_MS) {
|
|
||||||
latestAttemptTs = nowTs;
|
|
||||||
cumulativeTimeoutMs = 0;
|
|
||||||
} else {
|
|
||||||
cumulativeTimeoutMs += nowTs - latestAttemptTs;
|
|
||||||
latestAttemptTs = nowTs;
|
|
||||||
if (cumulativeTimeoutMs > MAX_TIMEOUT_MS) {
|
|
||||||
this.logger.error('[ScalingService] Redis unavailable after max timeout');
|
|
||||||
this.logger.error('[ScalingService] Exiting process...');
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this.logger.warn('[ScalingService] Redis unavailable - retrying to connect...');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw error;
|
throw error;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -3,18 +3,42 @@ import ioRedis from 'ioredis';
|
||||||
import type { Cluster, RedisOptions } from 'ioredis';
|
import type { Cluster, RedisOptions } from 'ioredis';
|
||||||
import { Service } from 'typedi';
|
import { Service } from 'typedi';
|
||||||
|
|
||||||
|
import { Debounce } from '@/decorators/debounce';
|
||||||
import { Logger } from '@/logger';
|
import { Logger } from '@/logger';
|
||||||
|
import { TypedEmitter } from '@/typed-emitter';
|
||||||
|
|
||||||
import type { RedisClientType } from '../scaling/redis/redis.types';
|
import type { RedisClientType } from '../scaling/redis/redis.types';
|
||||||
|
|
||||||
|
type RedisEventMap = {
|
||||||
|
'connection-lost': number;
|
||||||
|
'connection-recovered': never;
|
||||||
|
};
|
||||||
|
|
||||||
@Service()
|
@Service()
|
||||||
export class RedisClientService {
|
export class RedisClientService extends TypedEmitter<RedisEventMap> {
|
||||||
private readonly clients = new Set<ioRedis | Cluster>();
|
private readonly clients = new Set<ioRedis | Cluster>();
|
||||||
|
|
||||||
|
private readonly config = {
|
||||||
|
/** How long (in ms) to try to reconnect for before exiting. */
|
||||||
|
maxTimeout: this.globalConfig.queue.bull.redis.timeoutThreshold,
|
||||||
|
|
||||||
|
/** How long (in ms) to wait between reconnection attempts. */
|
||||||
|
retryInterval: 1000,
|
||||||
|
|
||||||
|
/** How long (in ms) to wait before resetting the cumulative timeout. */
|
||||||
|
resetLength: 30_000,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Whether any client has lost connection to Redis. */
|
||||||
|
private lostConnection = false;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly logger: Logger,
|
private readonly logger: Logger,
|
||||||
private readonly globalConfig: GlobalConfig,
|
private readonly globalConfig: GlobalConfig,
|
||||||
) {}
|
) {
|
||||||
|
super();
|
||||||
|
this.registerListeners();
|
||||||
|
}
|
||||||
|
|
||||||
createClient(arg: { type: RedisClientType; extraOptions?: RedisOptions }) {
|
createClient(arg: { type: RedisClientType; extraOptions?: RedisOptions }) {
|
||||||
const client =
|
const client =
|
||||||
|
@ -22,6 +46,19 @@ export class RedisClientService {
|
||||||
? this.createClusterClient(arg)
|
? this.createClusterClient(arg)
|
||||||
: this.createRegularClient(arg);
|
: this.createRegularClient(arg);
|
||||||
|
|
||||||
|
client.on('error', (error) => {
|
||||||
|
if ('code' in error && error.code === 'ECONNREFUSED') return; // handled by retryStrategy
|
||||||
|
|
||||||
|
this.logger.error(`[Redis client] ${error.message}`, { error });
|
||||||
|
});
|
||||||
|
|
||||||
|
client.on('ready', () => {
|
||||||
|
if (this.lostConnection) {
|
||||||
|
this.emit('connection-recovered');
|
||||||
|
this.lostConnection = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
this.clients.add(client);
|
this.clients.add(client);
|
||||||
|
|
||||||
return client;
|
return client;
|
||||||
|
@ -118,32 +155,29 @@ export class RedisClientService {
|
||||||
* Reset the cumulative timeout if >30s between reconnection attempts.
|
* Reset the cumulative timeout if >30s between reconnection attempts.
|
||||||
*/
|
*/
|
||||||
private retryStrategy() {
|
private retryStrategy() {
|
||||||
const RETRY_INTERVAL = 500; // ms
|
|
||||||
const RESET_LENGTH = 30_000; // ms
|
|
||||||
const MAX_TIMEOUT = this.globalConfig.queue.bull.redis.timeoutThreshold;
|
|
||||||
|
|
||||||
let lastAttemptTs = 0;
|
let lastAttemptTs = 0;
|
||||||
let cumulativeTimeout = 0;
|
let cumulativeTimeout = 0;
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
const nowTs = Date.now();
|
const nowTs = Date.now();
|
||||||
|
|
||||||
if (nowTs - lastAttemptTs > RESET_LENGTH) {
|
if (nowTs - lastAttemptTs > this.config.resetLength) {
|
||||||
cumulativeTimeout = 0;
|
cumulativeTimeout = 0;
|
||||||
lastAttemptTs = nowTs;
|
lastAttemptTs = nowTs;
|
||||||
} else {
|
} else {
|
||||||
cumulativeTimeout += nowTs - lastAttemptTs;
|
cumulativeTimeout += nowTs - lastAttemptTs;
|
||||||
lastAttemptTs = nowTs;
|
lastAttemptTs = nowTs;
|
||||||
if (cumulativeTimeout > MAX_TIMEOUT) {
|
if (cumulativeTimeout > this.config.maxTimeout) {
|
||||||
this.logger.error(`[Redis] Unable to connect after max timeout of ${MAX_TIMEOUT} ms`);
|
const maxTimeout = Math.round(this.config.maxTimeout / 1000) + 's';
|
||||||
this.logger.error('Exiting process...');
|
this.logger.error(`Unable to connect to Redis after trying to connect for ${maxTimeout}`);
|
||||||
|
this.logger.error('Exiting process due to Redis connection error');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.logger.warn('Redis unavailable - trying to reconnect...');
|
this.emit('connection-lost', cumulativeTimeout);
|
||||||
|
|
||||||
return RETRY_INTERVAL;
|
return this.config.retryInterval;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,4 +190,40 @@ export class RedisClientService {
|
||||||
return { host, port: parseInt(port) };
|
return { host, port: parseInt(port) };
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Debounce(1000)
|
||||||
|
emit<Event extends keyof RedisEventMap>(
|
||||||
|
event: Event,
|
||||||
|
...args: Array<RedisEventMap[Event]>
|
||||||
|
): boolean {
|
||||||
|
return super.emit(event, ...args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private registerListeners() {
|
||||||
|
const { maxTimeout: maxTimeoutMs, retryInterval: retryIntervalMs } = this.config;
|
||||||
|
|
||||||
|
const retryInterval = this.formatTimeout(retryIntervalMs);
|
||||||
|
const maxTimeout = this.formatTimeout(maxTimeoutMs);
|
||||||
|
|
||||||
|
this.on('connection-lost', (cumulativeTimeoutMs) => {
|
||||||
|
const cumulativeTimeout = this.formatTimeout(cumulativeTimeoutMs);
|
||||||
|
const reconnectionMsg = `Trying to reconnect in ${retryInterval}...`;
|
||||||
|
const timeoutDetails = `${cumulativeTimeout}/${maxTimeout}`;
|
||||||
|
|
||||||
|
this.logger.warn(`Lost Redis connection. ${reconnectionMsg} (${timeoutDetails})`);
|
||||||
|
|
||||||
|
this.lostConnection = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
this.on('connection-recovered', () => {
|
||||||
|
this.logger.info('Recovered Redis connection');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private formatTimeout(timeoutMs: number) {
|
||||||
|
const timeoutSeconds = timeoutMs / 1000;
|
||||||
|
const roundedTimeout = Math.round(timeoutSeconds * 10) / 10;
|
||||||
|
|
||||||
|
return roundedTimeout + 's';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue