From f252c4616a9d378caf9baf0da461cc36f59bab53 Mon Sep 17 00:00:00 2001 From: v-zhuravlev Date: Sun, 16 Feb 2025 18:05:46 +0800 Subject: [PATCH] Add NodeSystemdServiceCrashlooping alert to mixin (#3039) * Add NodeSystemdServiceCrashlooping alert --------- Signed-off-by: Vitaly Zhuravlev --- docs/node-mixin/alerts/alerts.libsonnet | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index dc93c9a8..61d9dd2d 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -407,6 +407,20 @@ description: 'Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }}', }, }, + { + alert: 'NodeSystemdServiceCrashlooping', + expr: ||| + increase(node_systemd_service_restart_total{%(nodeExporterSelector)s}[5m]) > 2 + ||| % $._config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Systemd service keeps restaring, possibly crash looping.', + description: 'Systemd service {{ $labels.name }} has being restarted too many times at {{ $labels.instance }} for the last 15 minutes. Please check if service is crash looping.', + }, + }, { alert: 'NodeBondingDegraded', expr: |||