mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
add alert for sd refresh failure (#12410)
* add alert for sd refresh failure Due to config error or sd service down, prometheus may fail to refresh sd resource, which may lead to scrape fail or irrelavant metrics. Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> * apply suggestions Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> --------- Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com>
This commit is contained in:
parent
f5c5806f20
commit
90f6c1faba
|
@ -20,6 +20,20 @@
|
|||
description: 'Prometheus %(prometheusName)s has failed to reload its configuration.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusSDRefreshFailure',
|
||||
expr: |||
|
||||
increase(prometheus_sd_refresh_failures_total{%(prometheusSelector)s}[10m]) > 0
|
||||
||| % $._config,
|
||||
'for': '20m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
summary: 'Failed Prometheus SD refresh.',
|
||||
description: 'Prometheus %(prometheusName)s has failed to refresh SD with mechanism {{$labels.mechanism}}.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotificationQueueRunningFull',
|
||||
expr: |||
|
||||
|
|
Loading…
Reference in a new issue