mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-26 14:09:41 -08:00
Prometheus monitoring mixin for Prometheus itself.
Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
parent
11b336e3ca
commit
ee1427faad
92
documentation/prometheus-mixin/alerts.libsonnet
Normal file
92
documentation/prometheus-mixin/alerts.libsonnet
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
{
|
||||||
|
prometheusAlerts+:: {
|
||||||
|
groups+: [
|
||||||
|
{
|
||||||
|
name: 'prometheus',
|
||||||
|
rules: [
|
||||||
|
{
|
||||||
|
alert: 'PromScrapeFailed',
|
||||||
|
expr: |||
|
||||||
|
up != 1
|
||||||
|
|||,
|
||||||
|
'for': '15m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Prometheus failed to scrape a target {{ $labels.job }} / {{ $labels.instance }}',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'PromBadConfig',
|
||||||
|
expr: |||
|
||||||
|
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '15m',
|
||||||
|
labels: {
|
||||||
|
severity: 'critical',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
mesage: 'Prometheus failed to reload config, see container logs',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'PromAlertmanagerBadConfig',
|
||||||
|
expr: |||
|
||||||
|
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '10m',
|
||||||
|
labels: {
|
||||||
|
severity: 'critical',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Alertmanager failed to reload config, see container logs',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'PromAlertsFailed',
|
||||||
|
expr: |||
|
||||||
|
sum(increase(alertmanager_notifications_failed_total{%(alertmanagerSelector)s}[5m])) by (namespace) > 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '5m',
|
||||||
|
labels: {
|
||||||
|
severity: 'critical',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Alertmanager failed to send an alert.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'PromRemoteStorageFailures',
|
||||||
|
expr: |||
|
||||||
|
(rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) * 100)
|
||||||
|
/
|
||||||
|
(rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) + rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[1m]))
|
||||||
|
> 1
|
||||||
|
||| % $._config,
|
||||||
|
'for': '15m',
|
||||||
|
labels: {
|
||||||
|
severity: 'critical',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Prometheus failed to send {{ printf "%.1f" $value }}% samples',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'PromRuleFailures',
|
||||||
|
'for': '15m',
|
||||||
|
expr: |||
|
||||||
|
rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[1m]) > 0
|
||||||
|
||| % $._config,
|
||||||
|
labels: {
|
||||||
|
severity: 'critical',
|
||||||
|
},
|
||||||
|
annotations: {
|
||||||
|
message: 'Prometheus failed to evaluate {{ printf "%.1f" $value }} rules / s',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
7
documentation/prometheus-mixin/config.libsonnet
Normal file
7
documentation/prometheus-mixin/config.libsonnet
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
_config+:: {
|
||||||
|
// Selectors are inserted between {} in Prometheus queries.
|
||||||
|
prometheusSelector: 'job="prometheus"',
|
||||||
|
alertmanagerSelector: 'job="alertmanager"',
|
||||||
|
},
|
||||||
|
}
|
2
documentation/prometheus-mixin/mixin.libsonnet
Normal file
2
documentation/prometheus-mixin/mixin.libsonnet
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
(import 'config.libsonnet') +
|
||||||
|
(import 'alerts.libsonnet')
|
Loading…
Reference in a new issue