Merge pull request #9700 from nikosmeds/nikosmeds/hagroupcrashlooping-mixin-60m

Increase time range for PrometheusHAGroupCrashlooping alert
2025-03-05 20:59:13 -08:00 · 2021-11-19 12:53:55 +01:00 · 2021-11-19 12:53:55 +01:00 · 2234798f60
parent 0bfc542a43 53ca693f9e
commit 2234798f60
1 changed files with 2 additions and 2 deletions
--- a/documentation/prometheus-mixin/alerts.libsonnet
+++ b/documentation/prometheus-mixin/alerts.libsonnet
@ -391,7 +391,7 @@
                and
                  ( 
                    count by (%(prometheusHAGroupLabels)s) (
-                      changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1
+                      changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1
                    ) 
                    / 
                    count by (%(prometheusHAGroupLabels)s) (
@ -418,7 +418,7 @@
            },
            annotations: {
              summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
-              description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config,
+              description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts in the last 30m or 2 unclean restarts in the last 1h.' % $._config,
            },
          },
        ],