mirror of
https://github.com/n8n-io/n8n.git
synced 2025-01-11 12:57:29 -08:00
feat(core): Cancel runner task on timeout in external mode (#12101)
This commit is contained in:
parent
78315aca3d
commit
f18263bc8f
|
@ -15,6 +15,7 @@
|
|||
"N8N_RUNNERS_SERVER_ENABLED",
|
||||
"N8N_RUNNERS_SERVER_HOST",
|
||||
"N8N_RUNNERS_SERVER_PORT",
|
||||
"N8N_RUNNERS_TASK_TIMEOUT",
|
||||
"NODE_FUNCTION_ALLOW_BUILTIN",
|
||||
"NODE_FUNCTION_ALLOW_EXTERNAL",
|
||||
"NODE_OPTIONS",
|
||||
|
|
|
@ -43,11 +43,11 @@ export class TaskRunnersConfig {
|
|||
@Env('N8N_RUNNERS_MAX_CONCURRENCY')
|
||||
maxConcurrency: number = 5;
|
||||
|
||||
/** How long (in seconds) a task is allowed to take for completion, else the task will be aborted and the runner restarted. Must be greater than 0. */
|
||||
/** How long (in seconds) a task is allowed to take for completion, else the task will be aborted. (In internal mode, the runner will also be restarted.) Must be greater than 0. */
|
||||
@Env('N8N_RUNNERS_TASK_TIMEOUT')
|
||||
taskTimeout: number = 60;
|
||||
|
||||
/** How often (in seconds) the runner must send a heartbeat to the broker, else the task will be aborted and the runner restarted. Must be greater than 0. */
|
||||
/** How often (in seconds) the runner must send a heartbeat to the broker, else the task will be aborted. (In internal mode, the runner will also be restarted.) Must be greater than 0. */
|
||||
@Env('N8N_RUNNERS_HEARTBEAT_INTERVAL')
|
||||
heartbeatInterval: number = 30;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,9 @@ export class BaseRunnerConfig {
|
|||
@Env('GENERIC_TIMEZONE')
|
||||
timezone: string = 'America/New_York';
|
||||
|
||||
@Env('N8N_RUNNERS_TASK_TIMEOUT')
|
||||
taskTimeout: number = 60;
|
||||
|
||||
@Nested
|
||||
healthcheckServer!: HealthcheckServerConfig;
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import { mock } from 'jest-mock-extended';
|
||||
import { DateTime } from 'luxon';
|
||||
import { setGlobalState, type CodeExecutionMode, type IDataObject } from 'n8n-workflow';
|
||||
import fs from 'node:fs';
|
||||
|
@ -61,7 +62,7 @@ describe('JsTaskRunner', () => {
|
|||
runner?: JsTaskRunner;
|
||||
}) => {
|
||||
jest.spyOn(runner, 'requestData').mockResolvedValue(taskData);
|
||||
return await runner.executeTask(task);
|
||||
return await runner.executeTask(task, mock<AbortSignal>());
|
||||
};
|
||||
|
||||
afterEach(() => {
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
import { ApplicationError } from 'n8n-workflow';
|
||||
|
||||
export class TaskCancelledError extends ApplicationError {
|
||||
constructor(reason: string) {
|
||||
super(`Task cancelled: ${reason}`, { level: 'warning' });
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
import { ApplicationError } from 'n8n-workflow';
|
||||
|
||||
export class TimeoutError extends ApplicationError {
|
||||
description: string;
|
||||
|
||||
constructor(taskTimeout: number) {
|
||||
super(
|
||||
`Task execution timed out after ${taskTimeout} ${taskTimeout === 1 ? 'second' : 'seconds'}`,
|
||||
);
|
||||
|
||||
const subtitle = 'The task runner was taking too long on this task, so the task was aborted.';
|
||||
|
||||
const fixes = {
|
||||
optimizeScript:
|
||||
'Optimize your script to prevent long-running tasks, e.g. by processing data in smaller batches.',
|
||||
ensureTermination:
|
||||
'Ensure that all paths in your script are able to terminate, i.e. no infinite loops.',
|
||||
};
|
||||
|
||||
const suggestions = [fixes.optimizeScript, fixes.ensureTermination];
|
||||
|
||||
const suggestionsText = suggestions
|
||||
.map((suggestion, index) => `${index + 1}. ${suggestion}`)
|
||||
.join('<br/>');
|
||||
|
||||
const description = `${subtitle} You can try the following:<br/><br/>${suggestionsText}`;
|
||||
|
||||
this.description = description;
|
||||
}
|
||||
}
|
|
@ -32,6 +32,7 @@ import { BuiltInsParserState } from './built-ins-parser/built-ins-parser-state';
|
|||
import { isErrorLike } from './errors/error-like';
|
||||
import { ExecutionError } from './errors/execution-error';
|
||||
import { makeSerializable } from './errors/serializable-error';
|
||||
import { TimeoutError } from './errors/timeout-error';
|
||||
import type { RequireResolver } from './require-resolver';
|
||||
import { createRequireResolver } from './require-resolver';
|
||||
import { validateRunForAllItemsOutput, validateRunForEachItemOutput } from './result-validation';
|
||||
|
@ -94,7 +95,7 @@ export class JsTaskRunner extends TaskRunner {
|
|||
});
|
||||
}
|
||||
|
||||
async executeTask(task: Task<JSExecSettings>): Promise<TaskResultData> {
|
||||
async executeTask(task: Task<JSExecSettings>, signal: AbortSignal): Promise<TaskResultData> {
|
||||
const settings = task.settings;
|
||||
a.ok(settings, 'JS Code not sent to runner');
|
||||
|
||||
|
@ -133,8 +134,8 @@ export class JsTaskRunner extends TaskRunner {
|
|||
|
||||
const result =
|
||||
settings.nodeMode === 'runOnceForAllItems'
|
||||
? await this.runForAllItems(task.taskId, settings, data, workflow, customConsole)
|
||||
: await this.runForEachItem(task.taskId, settings, data, workflow, customConsole);
|
||||
? await this.runForAllItems(task.taskId, settings, data, workflow, customConsole, signal)
|
||||
: await this.runForEachItem(task.taskId, settings, data, workflow, customConsole, signal);
|
||||
|
||||
return {
|
||||
result,
|
||||
|
@ -183,6 +184,7 @@ export class JsTaskRunner extends TaskRunner {
|
|||
data: JsTaskData,
|
||||
workflow: Workflow,
|
||||
customConsole: CustomConsole,
|
||||
signal: AbortSignal,
|
||||
): Promise<INodeExecutionData[]> {
|
||||
const dataProxy = this.createDataProxy(data, workflow, data.itemIndex);
|
||||
const inputItems = data.connectionInputData;
|
||||
|
@ -199,10 +201,26 @@ export class JsTaskRunner extends TaskRunner {
|
|||
};
|
||||
|
||||
try {
|
||||
const result = (await runInNewContext(
|
||||
`globalThis.global = globalThis; module.exports = async function VmCodeWrapper() {${settings.code}\n}()`,
|
||||
context,
|
||||
)) as TaskResultData['result'];
|
||||
const result = await new Promise<TaskResultData['result']>((resolve, reject) => {
|
||||
const abortHandler = () => {
|
||||
reject(new TimeoutError(this.taskTimeout));
|
||||
};
|
||||
|
||||
signal.addEventListener('abort', abortHandler, { once: true });
|
||||
|
||||
const taskResult = runInNewContext(
|
||||
`globalThis.global = globalThis; module.exports = async function VmCodeWrapper() {${settings.code}\n}()`,
|
||||
context,
|
||||
{ timeout: this.taskTimeout * 1000 },
|
||||
) as Promise<TaskResultData['result']>;
|
||||
|
||||
void taskResult
|
||||
.then(resolve)
|
||||
.catch(reject)
|
||||
.finally(() => {
|
||||
signal.removeEventListener('abort', abortHandler);
|
||||
});
|
||||
});
|
||||
|
||||
if (result === null) {
|
||||
return [];
|
||||
|
@ -230,6 +248,7 @@ export class JsTaskRunner extends TaskRunner {
|
|||
data: JsTaskData,
|
||||
workflow: Workflow,
|
||||
customConsole: CustomConsole,
|
||||
signal: AbortSignal,
|
||||
): Promise<INodeExecutionData[]> {
|
||||
const inputItems = data.connectionInputData;
|
||||
const returnData: INodeExecutionData[] = [];
|
||||
|
@ -255,10 +274,26 @@ export class JsTaskRunner extends TaskRunner {
|
|||
};
|
||||
|
||||
try {
|
||||
let result = (await runInNewContext(
|
||||
`module.exports = async function VmCodeWrapper() {${settings.code}\n}()`,
|
||||
context,
|
||||
)) as INodeExecutionData | undefined;
|
||||
let result = await new Promise<INodeExecutionData | undefined>((resolve, reject) => {
|
||||
const abortHandler = () => {
|
||||
reject(new TimeoutError(this.taskTimeout));
|
||||
};
|
||||
|
||||
signal.addEventListener('abort', abortHandler);
|
||||
|
||||
const taskResult = runInNewContext(
|
||||
`module.exports = async function VmCodeWrapper() {${settings.code}\n}()`,
|
||||
context,
|
||||
{ timeout: this.taskTimeout * 1000 },
|
||||
) as Promise<INodeExecutionData>;
|
||||
|
||||
void taskResult
|
||||
.then(resolve)
|
||||
.catch(reject)
|
||||
.finally(() => {
|
||||
signal.removeEventListener('abort', abortHandler);
|
||||
});
|
||||
});
|
||||
|
||||
// Filter out null values
|
||||
if (result === null) {
|
||||
|
|
|
@ -8,6 +8,8 @@ import type { BrokerMessage, RunnerMessage } from '@/message-types';
|
|||
import { TaskRunnerNodeTypes } from '@/node-types';
|
||||
import { RPC_ALLOW_LIST, type TaskResultData } from '@/runner-types';
|
||||
|
||||
import { TaskCancelledError } from './js-task-runner/errors/task-cancelled-error';
|
||||
|
||||
export interface Task<T = unknown> {
|
||||
taskId: string;
|
||||
settings?: T;
|
||||
|
@ -21,12 +23,14 @@ export interface TaskOffer {
|
|||
}
|
||||
|
||||
interface DataRequest {
|
||||
taskId: string;
|
||||
requestId: string;
|
||||
resolve: (data: unknown) => void;
|
||||
reject: (error: unknown) => void;
|
||||
}
|
||||
|
||||
interface NodeTypesRequest {
|
||||
taskId: string;
|
||||
requestId: string;
|
||||
resolve: (data: unknown) => void;
|
||||
reject: (error: unknown) => void;
|
||||
|
@ -82,14 +86,20 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
|
||||
private idleTimer: NodeJS.Timeout | undefined;
|
||||
|
||||
/** How long (in seconds) a task is allowed to take for completion, else the task will be aborted. */
|
||||
protected readonly taskTimeout: number;
|
||||
|
||||
/** How long (in seconds) a runner may be idle for before exit. */
|
||||
private readonly idleTimeout: number;
|
||||
|
||||
protected taskCancellations = new Map<Task['taskId'], AbortController>();
|
||||
|
||||
constructor(opts: TaskRunnerOpts) {
|
||||
super();
|
||||
this.taskType = opts.taskType;
|
||||
this.name = opts.name ?? 'Node.js Task Runner SDK';
|
||||
this.maxConcurrency = opts.maxConcurrency;
|
||||
this.taskTimeout = opts.taskTimeout;
|
||||
this.idleTimeout = opts.idleTimeout;
|
||||
|
||||
const wsUrl = `ws://${opts.n8nUri}/runners/_ws?id=${this.id}`;
|
||||
|
@ -208,7 +218,7 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
this.offerAccepted(message.offerId, message.taskId);
|
||||
break;
|
||||
case 'broker:taskcancel':
|
||||
this.taskCancelled(message.taskId);
|
||||
this.taskCancelled(message.taskId, message.reason);
|
||||
break;
|
||||
case 'broker:tasksettings':
|
||||
void this.receivedSettings(message.taskId, message.settings);
|
||||
|
@ -283,17 +293,35 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
});
|
||||
}
|
||||
|
||||
taskCancelled(taskId: string) {
|
||||
taskCancelled(taskId: string, reason: string) {
|
||||
const task = this.runningTasks.get(taskId);
|
||||
if (!task) {
|
||||
return;
|
||||
}
|
||||
task.cancelled = true;
|
||||
if (task.active) {
|
||||
// TODO
|
||||
} else {
|
||||
this.runningTasks.delete(taskId);
|
||||
|
||||
for (const [requestId, request] of this.dataRequests.entries()) {
|
||||
if (request.taskId === taskId) {
|
||||
request.reject(new TaskCancelledError(reason));
|
||||
this.dataRequests.delete(requestId);
|
||||
}
|
||||
}
|
||||
|
||||
for (const [requestId, request] of this.nodeTypesRequests.entries()) {
|
||||
if (request.taskId === taskId) {
|
||||
request.reject(new TaskCancelledError(reason));
|
||||
this.nodeTypesRequests.delete(requestId);
|
||||
}
|
||||
}
|
||||
|
||||
const controller = this.taskCancellations.get(taskId);
|
||||
if (controller) {
|
||||
controller.abort();
|
||||
this.taskCancellations.delete(taskId);
|
||||
}
|
||||
|
||||
if (!task.active) this.runningTasks.delete(taskId);
|
||||
|
||||
this.sendOffers();
|
||||
}
|
||||
|
||||
|
@ -326,20 +354,33 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
this.runningTasks.delete(taskId);
|
||||
return;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
this.taskCancellations.set(taskId, controller);
|
||||
|
||||
const taskTimeout = setTimeout(() => {
|
||||
if (!task.cancelled) {
|
||||
controller.abort();
|
||||
this.taskCancellations.delete(taskId);
|
||||
}
|
||||
}, this.taskTimeout * 1_000);
|
||||
|
||||
task.settings = settings;
|
||||
task.active = true;
|
||||
try {
|
||||
const data = await this.executeTask(task);
|
||||
const data = await this.executeTask(task, controller.signal);
|
||||
this.taskDone(taskId, data);
|
||||
} catch (error) {
|
||||
this.taskErrored(taskId, error);
|
||||
if (!task.cancelled) this.taskErrored(taskId, error);
|
||||
} finally {
|
||||
clearTimeout(taskTimeout);
|
||||
this.taskCancellations.delete(taskId);
|
||||
this.resetIdleTimer();
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
async executeTask(_task: Task): Promise<TaskResultData> {
|
||||
async executeTask(_task: Task, _signal: AbortSignal): Promise<TaskResultData> {
|
||||
throw new ApplicationError('Unimplemented');
|
||||
}
|
||||
|
||||
|
@ -352,6 +393,7 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
const nodeTypesPromise = new Promise<T>((resolve, reject) => {
|
||||
this.nodeTypesRequests.set(requestId, {
|
||||
requestId,
|
||||
taskId,
|
||||
resolve: resolve as (data: unknown) => void,
|
||||
reject,
|
||||
});
|
||||
|
@ -380,6 +422,7 @@ export abstract class TaskRunner extends EventEmitter {
|
|||
const p = new Promise<T>((resolve, reject) => {
|
||||
this.dataRequests.set(requestId, {
|
||||
requestId,
|
||||
taskId,
|
||||
resolve: resolve as (data: unknown) => void,
|
||||
reject,
|
||||
});
|
||||
|
|
|
@ -6,6 +6,7 @@ import { ApplicationError, type INodeTypeBaseDescription } from 'n8n-workflow';
|
|||
import { Time } from '@/constants';
|
||||
|
||||
import { TaskRejectError } from '../errors';
|
||||
import { TaskRunnerTimeoutError } from '../errors/task-runner-timeout.error';
|
||||
import type { RunnerLifecycleEvents } from '../runner-lifecycle-events';
|
||||
import { TaskBroker } from '../task-broker.service';
|
||||
import type { TaskOffer, TaskRequest, TaskRunner } from '../task-broker.service';
|
||||
|
@ -721,7 +722,7 @@ describe('TaskBroker', () => {
|
|||
|
||||
beforeAll(() => {
|
||||
jest.useFakeTimers();
|
||||
config = mock<TaskRunnersConfig>({ taskTimeout: 30 });
|
||||
config = mock<TaskRunnersConfig>({ taskTimeout: 30, mode: 'internal' });
|
||||
taskBroker = new TaskBroker(mock(), config, runnerLifecycleEvents);
|
||||
});
|
||||
|
||||
|
@ -800,7 +801,7 @@ describe('TaskBroker', () => {
|
|||
expect(taskBroker.getTasks().get(taskId)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('on timeout, we should emit `runner:timed-out-during-task` event and send error to requester', async () => {
|
||||
it('[internal mode] on timeout, we should emit `runner:timed-out-during-task` event and send error to requester', async () => {
|
||||
jest.spyOn(global, 'clearTimeout');
|
||||
|
||||
const taskId = 'task1';
|
||||
|
@ -839,5 +840,50 @@ describe('TaskBroker', () => {
|
|||
|
||||
expect(taskBroker.getTasks().get(taskId)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('[external mode] on timeout, we should instruct the runner to cancel and send error to requester', async () => {
|
||||
const config = mock<TaskRunnersConfig>({ taskTimeout: 30, mode: 'external' });
|
||||
taskBroker = new TaskBroker(mock(), config, runnerLifecycleEvents);
|
||||
|
||||
jest.spyOn(global, 'clearTimeout');
|
||||
|
||||
const taskId = 'task1';
|
||||
const runnerId = 'runner1';
|
||||
const requesterId = 'requester1';
|
||||
const runner = mock<TaskRunner>({ id: runnerId });
|
||||
const runnerCallback = jest.fn();
|
||||
const requesterCallback = jest.fn();
|
||||
|
||||
taskBroker.registerRunner(runner, runnerCallback);
|
||||
taskBroker.registerRequester(requesterId, requesterCallback);
|
||||
|
||||
taskBroker.setTasks({
|
||||
[taskId]: { id: taskId, runnerId, requesterId, taskType: 'test' },
|
||||
});
|
||||
|
||||
await taskBroker.sendTaskSettings(taskId, {});
|
||||
runnerCallback.mockClear();
|
||||
|
||||
jest.runAllTimers();
|
||||
|
||||
await Promise.resolve(); // for timeout callback
|
||||
await Promise.resolve(); // for sending messages to runner and requester
|
||||
await Promise.resolve(); // for task cleanup and removal
|
||||
|
||||
expect(runnerCallback).toHaveBeenLastCalledWith({
|
||||
type: 'broker:taskcancel',
|
||||
taskId,
|
||||
reason: 'Task execution timed out',
|
||||
});
|
||||
|
||||
expect(requesterCallback).toHaveBeenCalledWith({
|
||||
type: 'broker:taskerror',
|
||||
taskId,
|
||||
error: expect.any(TaskRunnerTimeoutError),
|
||||
});
|
||||
|
||||
expect(clearTimeout).toHaveBeenCalled();
|
||||
expect(taskBroker.getTasks().get(taskId)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,15 +1,23 @@
|
|||
import type { TaskRunnerMode } from '@n8n/config/src/configs/runners.config';
|
||||
import { ApplicationError } from 'n8n-workflow';
|
||||
|
||||
export class TaskRunnerTimeoutError extends ApplicationError {
|
||||
description: string;
|
||||
|
||||
constructor(taskTimeout: number, isSelfHosted: boolean) {
|
||||
constructor({
|
||||
taskTimeout,
|
||||
isSelfHosted,
|
||||
mode,
|
||||
}: { taskTimeout: number; isSelfHosted: boolean; mode: TaskRunnerMode }) {
|
||||
super(
|
||||
`Task execution timed out after ${taskTimeout} ${taskTimeout === 1 ? 'second' : 'seconds'}`,
|
||||
);
|
||||
|
||||
const subtitle =
|
||||
'The task runner was taking too long on this task, so it was suspected of being unresponsive and restarted, and the task was aborted. You can try the following:';
|
||||
const subtitles = {
|
||||
internal:
|
||||
'The task runner was taking too long on this task, so it was suspected of being unresponsive and restarted, and the task was aborted.',
|
||||
external: 'The task runner was taking too long on this task, so the task was aborted.',
|
||||
};
|
||||
|
||||
const fixes = {
|
||||
optimizeScript:
|
||||
|
@ -27,7 +35,7 @@ export class TaskRunnerTimeoutError extends ApplicationError {
|
|||
.map((suggestion, index) => `${index + 1}. ${suggestion}`)
|
||||
.join('<br/>');
|
||||
|
||||
const description = `${subtitle}<br/><br/>${suggestionsText}`;
|
||||
const description = `${mode === 'internal' ? subtitles.internal : subtitles.external} You can try the following:<br/><br/>${suggestionsText}`;
|
||||
|
||||
this.description = description;
|
||||
}
|
||||
|
|
|
@ -459,14 +459,25 @@ export class TaskBroker {
|
|||
const task = this.tasks.get(taskId);
|
||||
if (!task) return;
|
||||
|
||||
this.runnerLifecycleEvents.emit('runner:timed-out-during-task');
|
||||
if (this.taskRunnersConfig.mode === 'internal') {
|
||||
this.runnerLifecycleEvents.emit('runner:timed-out-during-task');
|
||||
} else if (this.taskRunnersConfig.mode === 'external') {
|
||||
await this.messageRunner(task.runnerId, {
|
||||
type: 'broker:taskcancel',
|
||||
taskId,
|
||||
reason: 'Task execution timed out',
|
||||
});
|
||||
}
|
||||
|
||||
const { taskTimeout, mode } = this.taskRunnersConfig;
|
||||
|
||||
await this.taskErrorHandler(
|
||||
taskId,
|
||||
new TaskRunnerTimeoutError(
|
||||
this.taskRunnersConfig.taskTimeout,
|
||||
config.getEnv('deployment.type') !== 'cloud',
|
||||
),
|
||||
new TaskRunnerTimeoutError({
|
||||
taskTimeout,
|
||||
isSelfHosted: config.getEnv('deployment.type') !== 'cloud',
|
||||
mode,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue