2023-08-04 11:51:07 -07:00
|
|
|
import config from '@/config';
|
|
|
|
import { N8N_VERSION } from '@/constants';
|
|
|
|
import type express from 'express';
|
|
|
|
import promBundle from 'express-prom-bundle';
|
|
|
|
import promClient, { type Counter } from 'prom-client';
|
|
|
|
import semverParse from 'semver/functions/parse';
|
|
|
|
import { Service } from 'typedi';
|
|
|
|
import EventEmitter from 'events';
|
|
|
|
|
perf(core): Improve caching service (#8213)
Story: https://linear.app/n8n/issue/PAY-1188
- Implement Redis hashes on the caching service, based on Micha's work
in #7747, adapted from `node-cache-manager-ioredis-yet`. Optimize
workflow ownership lookups and manual webhook lookups with Redis hashes.
- Simplify the caching service by removing all currently unused methods
and options: `enable`, `disable`, `getCache`, `keys`, `keyValues`,
`refreshFunctionEach`, `refreshFunctionMany`, `refreshTtl`, etc.
- Remove the flag `N8N_CACHE_ENABLED`. Currently some features on
`master` are broken with caching disabled, and test webhooks now rely
entirely on caching, for multi-main setup support. We originally
introduced this flag to protect against excessive memory usage, but
total cache usage is low enough that we decided to drop this setting.
Apparently this flag was also never documented.
- Overall caching service refactor: use generics, reduce branching, add
discriminants for cache kinds for better type safety, type caching
events, improve readability, remove outdated docs, etc. Also refactor
and expand caching service tests.
Follow-up to: https://github.com/n8n-io/n8n/pull/8176
---------
Co-authored-by: Michael Auerswald <michael.auerswald@gmail.com>
2024-01-05 02:52:44 -08:00
|
|
|
import { CacheService } from '@/services/cache/cache.service';
|
2024-06-11 00:11:39 -07:00
|
|
|
import { type EventMessageTypes } from '@/eventbus';
|
2024-02-06 01:08:46 -08:00
|
|
|
import { MessageEventBus } from '@/eventbus/MessageEventBus/MessageEventBus';
|
2023-10-25 07:35:22 -07:00
|
|
|
import { Logger } from '@/Logger';
|
2024-06-11 00:11:39 -07:00
|
|
|
import { EventMessageTypeNames } from 'n8n-workflow';
|
2023-08-04 11:51:07 -07:00
|
|
|
|
|
|
|
@Service()
|
2024-07-15 03:16:27 -07:00
|
|
|
export class PrometheusMetricsService extends EventEmitter {
|
2023-10-25 07:35:22 -07:00
|
|
|
constructor(
|
|
|
|
private readonly logger: Logger,
|
|
|
|
private readonly cacheService: CacheService,
|
2024-01-26 03:21:15 -08:00
|
|
|
private readonly eventBus: MessageEventBus,
|
2023-10-25 07:35:22 -07:00
|
|
|
) {
|
2023-08-04 11:51:07 -07:00
|
|
|
super();
|
|
|
|
}
|
|
|
|
|
|
|
|
counters: Record<string, Counter<string> | null> = {};
|
|
|
|
|
|
|
|
async configureMetrics(app: express.Application) {
|
|
|
|
promClient.register.clear(); // clear all metrics in case we call this a second time
|
|
|
|
this.setupDefaultMetrics();
|
|
|
|
this.setupN8nVersionMetric();
|
|
|
|
this.setupCacheMetrics();
|
|
|
|
this.setupMessageEventBusMetrics();
|
|
|
|
this.setupApiMetrics(app);
|
|
|
|
this.mountMetricsEndpoint(app);
|
|
|
|
}
|
|
|
|
|
|
|
|
private setupN8nVersionMetric() {
|
|
|
|
const n8nVersion = semverParse(N8N_VERSION || '0.0.0');
|
|
|
|
|
|
|
|
if (n8nVersion) {
|
|
|
|
const versionGauge = new promClient.Gauge({
|
|
|
|
name: config.getEnv('endpoints.metrics.prefix') + 'version_info',
|
|
|
|
help: 'n8n version info.',
|
|
|
|
labelNames: ['version', 'major', 'minor', 'patch'],
|
|
|
|
});
|
|
|
|
|
|
|
|
versionGauge.set(
|
|
|
|
{
|
|
|
|
version: 'v' + n8nVersion.version,
|
|
|
|
major: n8nVersion.major,
|
|
|
|
minor: n8nVersion.minor,
|
|
|
|
patch: n8nVersion.patch,
|
|
|
|
},
|
|
|
|
1,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private setupDefaultMetrics() {
|
|
|
|
if (config.getEnv('endpoints.metrics.includeDefaultMetrics')) {
|
|
|
|
promClient.collectDefaultMetrics();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private setupApiMetrics(app: express.Application) {
|
|
|
|
if (config.getEnv('endpoints.metrics.includeApiEndpoints')) {
|
|
|
|
const metricsMiddleware = promBundle({
|
|
|
|
autoregister: false,
|
|
|
|
includeUp: false,
|
|
|
|
includePath: config.getEnv('endpoints.metrics.includeApiPathLabel'),
|
|
|
|
includeMethod: config.getEnv('endpoints.metrics.includeApiMethodLabel'),
|
|
|
|
includeStatusCode: config.getEnv('endpoints.metrics.includeApiStatusCodeLabel'),
|
|
|
|
});
|
|
|
|
|
|
|
|
app.use(['/rest/', '/webhook/', 'webhook-test/', '/api/'], metricsMiddleware);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mountMetricsEndpoint(app: express.Application) {
|
2024-05-31 05:06:13 -07:00
|
|
|
app.get('/metrics', async (_req: express.Request, res: express.Response) => {
|
2023-08-04 11:51:07 -07:00
|
|
|
const metrics = await promClient.register.metrics();
|
|
|
|
res.setHeader('Content-Type', promClient.register.contentType);
|
|
|
|
res.send(metrics).end();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
private setupCacheMetrics() {
|
|
|
|
if (!config.getEnv('endpoints.metrics.includeCacheMetrics')) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
this.counters.cacheHitsTotal = new promClient.Counter({
|
|
|
|
name: config.getEnv('endpoints.metrics.prefix') + 'cache_hits_total',
|
|
|
|
help: 'Total number of cache hits.',
|
|
|
|
labelNames: ['cache'],
|
|
|
|
});
|
|
|
|
this.counters.cacheHitsTotal.inc(0);
|
perf(core): Improve caching service (#8213)
Story: https://linear.app/n8n/issue/PAY-1188
- Implement Redis hashes on the caching service, based on Micha's work
in #7747, adapted from `node-cache-manager-ioredis-yet`. Optimize
workflow ownership lookups and manual webhook lookups with Redis hashes.
- Simplify the caching service by removing all currently unused methods
and options: `enable`, `disable`, `getCache`, `keys`, `keyValues`,
`refreshFunctionEach`, `refreshFunctionMany`, `refreshTtl`, etc.
- Remove the flag `N8N_CACHE_ENABLED`. Currently some features on
`master` are broken with caching disabled, and test webhooks now rely
entirely on caching, for multi-main setup support. We originally
introduced this flag to protect against excessive memory usage, but
total cache usage is low enough that we decided to drop this setting.
Apparently this flag was also never documented.
- Overall caching service refactor: use generics, reduce branching, add
discriminants for cache kinds for better type safety, type caching
events, improve readability, remove outdated docs, etc. Also refactor
and expand caching service tests.
Follow-up to: https://github.com/n8n-io/n8n/pull/8176
---------
Co-authored-by: Michael Auerswald <michael.auerswald@gmail.com>
2024-01-05 02:52:44 -08:00
|
|
|
this.cacheService.on('metrics.cache.hit', (amount: number = 1) => {
|
2023-08-04 11:51:07 -07:00
|
|
|
this.counters.cacheHitsTotal?.inc(amount);
|
|
|
|
});
|
|
|
|
|
|
|
|
this.counters.cacheMissesTotal = new promClient.Counter({
|
|
|
|
name: config.getEnv('endpoints.metrics.prefix') + 'cache_misses_total',
|
|
|
|
help: 'Total number of cache misses.',
|
|
|
|
labelNames: ['cache'],
|
|
|
|
});
|
|
|
|
this.counters.cacheMissesTotal.inc(0);
|
perf(core): Improve caching service (#8213)
Story: https://linear.app/n8n/issue/PAY-1188
- Implement Redis hashes on the caching service, based on Micha's work
in #7747, adapted from `node-cache-manager-ioredis-yet`. Optimize
workflow ownership lookups and manual webhook lookups with Redis hashes.
- Simplify the caching service by removing all currently unused methods
and options: `enable`, `disable`, `getCache`, `keys`, `keyValues`,
`refreshFunctionEach`, `refreshFunctionMany`, `refreshTtl`, etc.
- Remove the flag `N8N_CACHE_ENABLED`. Currently some features on
`master` are broken with caching disabled, and test webhooks now rely
entirely on caching, for multi-main setup support. We originally
introduced this flag to protect against excessive memory usage, but
total cache usage is low enough that we decided to drop this setting.
Apparently this flag was also never documented.
- Overall caching service refactor: use generics, reduce branching, add
discriminants for cache kinds for better type safety, type caching
events, improve readability, remove outdated docs, etc. Also refactor
and expand caching service tests.
Follow-up to: https://github.com/n8n-io/n8n/pull/8176
---------
Co-authored-by: Michael Auerswald <michael.auerswald@gmail.com>
2024-01-05 02:52:44 -08:00
|
|
|
this.cacheService.on('metrics.cache.miss', (amount: number = 1) => {
|
2023-08-04 11:51:07 -07:00
|
|
|
this.counters.cacheMissesTotal?.inc(amount);
|
|
|
|
});
|
|
|
|
|
|
|
|
this.counters.cacheUpdatesTotal = new promClient.Counter({
|
|
|
|
name: config.getEnv('endpoints.metrics.prefix') + 'cache_updates_total',
|
|
|
|
help: 'Total number of cache updates.',
|
|
|
|
labelNames: ['cache'],
|
|
|
|
});
|
|
|
|
this.counters.cacheUpdatesTotal.inc(0);
|
perf(core): Improve caching service (#8213)
Story: https://linear.app/n8n/issue/PAY-1188
- Implement Redis hashes on the caching service, based on Micha's work
in #7747, adapted from `node-cache-manager-ioredis-yet`. Optimize
workflow ownership lookups and manual webhook lookups with Redis hashes.
- Simplify the caching service by removing all currently unused methods
and options: `enable`, `disable`, `getCache`, `keys`, `keyValues`,
`refreshFunctionEach`, `refreshFunctionMany`, `refreshTtl`, etc.
- Remove the flag `N8N_CACHE_ENABLED`. Currently some features on
`master` are broken with caching disabled, and test webhooks now rely
entirely on caching, for multi-main setup support. We originally
introduced this flag to protect against excessive memory usage, but
total cache usage is low enough that we decided to drop this setting.
Apparently this flag was also never documented.
- Overall caching service refactor: use generics, reduce branching, add
discriminants for cache kinds for better type safety, type caching
events, improve readability, remove outdated docs, etc. Also refactor
and expand caching service tests.
Follow-up to: https://github.com/n8n-io/n8n/pull/8176
---------
Co-authored-by: Michael Auerswald <michael.auerswald@gmail.com>
2024-01-05 02:52:44 -08:00
|
|
|
this.cacheService.on('metrics.cache.update', (amount: number = 1) => {
|
|
|
|
this.counters.cacheUpdatesTotal?.inc(amount);
|
|
|
|
});
|
2023-08-04 11:51:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
private getCounterForEvent(event: EventMessageTypes): Counter<string> | null {
|
|
|
|
if (!promClient) return null;
|
|
|
|
if (!this.counters[event.eventName]) {
|
|
|
|
const prefix = config.getEnv('endpoints.metrics.prefix');
|
|
|
|
const metricName =
|
|
|
|
prefix + event.eventName.replace('n8n.', '').replace(/\./g, '_') + '_total';
|
|
|
|
|
|
|
|
if (!promClient.validateMetricName(metricName)) {
|
2023-10-25 07:35:22 -07:00
|
|
|
this.logger.debug(`Invalid metric name: ${metricName}. Ignoring it!`);
|
2023-08-04 11:51:07 -07:00
|
|
|
this.counters[event.eventName] = null;
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
const counter = new promClient.Counter({
|
|
|
|
name: metricName,
|
|
|
|
help: `Total number of ${event.eventName} events.`,
|
2024-06-11 00:11:39 -07:00
|
|
|
labelNames: Object.keys(this.getLabelsForEvent(event)),
|
2023-08-04 11:51:07 -07:00
|
|
|
});
|
|
|
|
counter.inc(0);
|
|
|
|
this.counters[event.eventName] = counter;
|
|
|
|
}
|
|
|
|
|
|
|
|
return this.counters[event.eventName];
|
|
|
|
}
|
|
|
|
|
|
|
|
private setupMessageEventBusMetrics() {
|
|
|
|
if (!config.getEnv('endpoints.metrics.includeMessageEventBusMetrics')) {
|
|
|
|
return;
|
|
|
|
}
|
2024-06-11 00:11:39 -07:00
|
|
|
this.eventBus.on('metrics.messageEventBus.Event', (event: EventMessageTypes) => {
|
2023-08-04 11:51:07 -07:00
|
|
|
const counter = this.getCounterForEvent(event);
|
|
|
|
if (!counter) return;
|
|
|
|
counter.inc(1);
|
|
|
|
});
|
|
|
|
}
|
2024-06-11 00:11:39 -07:00
|
|
|
|
|
|
|
getLabelsForEvent(event: EventMessageTypes): Record<string, string> {
|
|
|
|
switch (event.__type) {
|
|
|
|
case EventMessageTypeNames.audit:
|
|
|
|
if (event.eventName.startsWith('n8n.audit.user.credentials')) {
|
|
|
|
return config.getEnv('endpoints.metrics.includeCredentialTypeLabel')
|
|
|
|
? {
|
|
|
|
credential_type: this.getLabelValueForCredential(
|
|
|
|
event.payload.credentialType ?? 'unknown',
|
|
|
|
),
|
|
|
|
}
|
|
|
|
: {};
|
|
|
|
}
|
|
|
|
|
|
|
|
if (event.eventName.startsWith('n8n.audit.workflow')) {
|
|
|
|
return config.getEnv('endpoints.metrics.includeWorkflowIdLabel')
|
|
|
|
? { workflow_id: event.payload.workflowId?.toString() ?? 'unknown' }
|
|
|
|
: {};
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case EventMessageTypeNames.node:
|
|
|
|
return config.getEnv('endpoints.metrics.includeNodeTypeLabel')
|
|
|
|
? { node_type: this.getLabelValueForNode(event.payload.nodeType ?? 'unknown') }
|
|
|
|
: {};
|
|
|
|
|
|
|
|
case EventMessageTypeNames.workflow:
|
|
|
|
return config.getEnv('endpoints.metrics.includeWorkflowIdLabel')
|
|
|
|
? { workflow_id: event.payload.workflowId?.toString() ?? 'unknown' }
|
|
|
|
: {};
|
|
|
|
}
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
getLabelValueForNode(nodeType: string) {
|
|
|
|
return nodeType.replace('n8n-nodes-', '').replace(/\./g, '_');
|
|
|
|
}
|
|
|
|
|
|
|
|
getLabelValueForCredential(credentialType: string) {
|
|
|
|
return credentialType.replace(/\./g, '_');
|
|
|
|
}
|
2023-08-04 11:51:07 -07:00
|
|
|
}
|