mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-01-03 18:07:46 -08:00
0107bc7942
* Make FS space alerts thresholds configurable (#1) This makes it possible to tweak the thresholds for the NodeFilesystemSpaceFillingUp alerts. Which might be necessary in systems like Kubernetes, where the image garbage collector runs at 85%, so it's not a problem that the disk reaches that usage %. Signed-off-by: iuri aranda <iuri@skyscrapers.eu>
192 lines
8.3 KiB
Plaintext
192 lines
8.3 KiB
Plaintext
{
|
|
prometheusAlerts+:: {
|
|
groups+: [
|
|
{
|
|
name: 'node-exporter',
|
|
rules: [
|
|
{
|
|
alert: 'NodeFilesystemSpaceFillingUp',
|
|
expr: |||
|
|
(
|
|
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
|
|
and
|
|
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem is predicted to run out of space within the next 24 hours.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemSpaceFillingUp',
|
|
expr: |||
|
|
(
|
|
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
|
|
and
|
|
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: '%(nodeCriticalSeverity)s' % $._config,
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem is predicted to run out of space within the next 4 hours.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemAlmostOutOfSpace',
|
|
expr: |||
|
|
(
|
|
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 5
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem has less than 5% space left.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemAlmostOutOfSpace',
|
|
expr: |||
|
|
(
|
|
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 3
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: '%(nodeCriticalSeverity)s' % $._config,
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem has less than 3% space left.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemFilesFillingUp',
|
|
expr: |||
|
|
(
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40
|
|
and
|
|
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemFilesFillingUp',
|
|
expr: |||
|
|
(
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20
|
|
and
|
|
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: '%(nodeCriticalSeverity)s' % $._config,
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemAlmostOutOfFiles',
|
|
expr: |||
|
|
(
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 5
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem has less than 5% inodes left.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeFilesystemAlmostOutOfFiles',
|
|
expr: |||
|
|
(
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 3
|
|
and
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
|
|
)
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: '%(nodeCriticalSeverity)s' % $._config,
|
|
},
|
|
annotations: {
|
|
summary: 'Filesystem has less than 3% inodes left.',
|
|
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeNetworkReceiveErrs',
|
|
expr: |||
|
|
increase(node_network_receive_errs_total[2m]) > 10
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Network interface is reporting many receive errors.',
|
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.',
|
|
},
|
|
},
|
|
{
|
|
alert: 'NodeNetworkTransmitErrs',
|
|
expr: |||
|
|
increase(node_network_transmit_errs_total[2m]) > 10
|
|
||| % $._config,
|
|
'for': '1h',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
annotations: {
|
|
summary: 'Network interface is reporting many transmit errors.',
|
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
}
|