Merge pull request #9700 from nikosmeds/nikosmeds/hagroupcrashlooping-mixin-60m

Increase time range for PrometheusHAGroupCrashlooping alert
This commit is contained in:
Björn Rabenstein 2021-11-19 12:53:55 +01:00 committed by GitHub
commit 2234798f60
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -391,7 +391,7 @@
and and
( (
count by (%(prometheusHAGroupLabels)s) ( count by (%(prometheusHAGroupLabels)s) (
changes(process_start_time_seconds{%(prometheusSelector)s}[30m]) > 1 changes(process_start_time_seconds{%(prometheusSelector)s}[1h]) > 1
) )
/ /
count by (%(prometheusHAGroupLabels)s) ( count by (%(prometheusHAGroupLabels)s) (
@ -418,7 +418,7 @@
}, },
annotations: { annotations: {
summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.', summary: 'More than half of the Prometheus instances within the same HA group are crashlooping.',
description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts or 2 unclean restarts in the last 30m.' % $._config, description: '{{ $value | humanizePercentage }} of Prometheus instances within the %(prometheusHAGroupName)s HA group have had at least 5 total restarts in the last 30m or 2 unclean restarts in the last 1h.' % $._config,
}, },
}, },
], ],