Alert if more than 1% of alerts fail for a given integration.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
Tom Wilkie 2018-08-07 14:18:33 +02:00
parent 266ba185fe
commit 50861d586a

View file

@ -33,14 +33,14 @@
{ {
alert: 'PromAlertsFailed', alert: 'PromAlertsFailed',
expr: ||| expr: |||
sum(increase(alertmanager_notifications_failed_total{%(alertmanagerSelector)s}[5m])) by (namespace) > 0 100 * rate(alertmanager_notifications_failed_total{%(alertmanagerSelector)s}[5m]) / rate(alertmanager_notifications_total{%(alertmanagerSelector)s}[5m]) > 1
||| % $._config, ||| % $._config,
'for': '5m', 'for': '5m',
labels: { labels: {
severity: 'critical', severity: 'critical',
}, },
annotations: { annotations: {
message: 'Alertmanager failed to send an alert.', message: 'Alertmanager failed to send {{ printf "%.1f" $value }}% alerts to {{ $labels.integration }}.',
}, },
}, },
{ {