mixin: Add a capability to exclude non-prod AM instances

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2020-12-03 20:59:53 +01:00
parent f055690b34
commit 553f904f2d
2 changed files with 14 additions and 4 deletions

View file

@ -267,9 +267,9 @@
alert: 'PrometheusErrorSendingAlertsToAnyAlertmanager',
expr: |||
min without (alertmanager) (
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m])
rate(prometheus_notifications_errors_total{%(prometheusSelector)s,alertmanager!~`%(nonNotifyingAlertmanagerRegEx)s`}[5m])
/
rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m])
rate(prometheus_notifications_sent_total{%(prometheusSelector)s,alertmanager!~`%(nonNotifyingAlertmanagerRegEx)s`}[5m])
)
* 100
> 3
@ -289,9 +289,9 @@
alert: 'PrometheusErrorSendingAlertsToAnyAlertmanager',
expr: |||
min by (%(prometheusHAGroupLabels)s) (
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m])
rate(prometheus_notifications_errors_total{%(prometheusSelector)s,alertmanager!~`%(nonNotifyingAlertmanagerRegEx)s`}[5m])
/
rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m])
rate(prometheus_notifications_sent_total{%(prometheusSelector)s,alertmanager!~`%(nonNotifyingAlertmanagerRegEx)s`}[5m])
)
* 100
> 3

View file

@ -27,5 +27,15 @@
// HA group. All labels used here must also be present in
// prometheusHAGroupLabels above.
prometheusHAGroupName: '{{$labels.job}}',
// nonNotifyingAlertmanagerRegEx can be used to mark Alertmanager
// instances that are not part of the Alertmanager cluster
// delivering production notifications. This is important for the
// PrometheusErrorSendingAlertsToAnyAlertmanager alert. Otherwise,
// a still working test or auditing instance could mask a full
// failure of all the production instances. The provided regular
// expression is matched against the `alertmanager` label.
// Example: @'http://test-alertmanager\..*'
nonNotifyingAlertmanagerRegEx: @'',
},
}