2018-05-08 03:10:29 -07:00
|
|
|
{
|
|
|
|
prometheusAlerts+:: {
|
|
|
|
groups+: [
|
|
|
|
{
|
2018-08-06 01:41:18 -07:00
|
|
|
name: 'node-exporter',
|
2018-05-08 03:10:29 -07:00
|
|
|
rules: [
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemSpaceFillingUp',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
predict_linear(node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 24*60*60) < 0
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.4
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'warning',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of space within the next 24 hours.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemSpaceFillingUp',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
predict_linear(node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 4*60*60) < 0
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.2
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'critical',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of space within the next 4 hours.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemOutOfSpace',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 5
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'warning',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemOutOfSpace',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 3
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'critical',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemFilesFillingUp',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 24*60*60) < 0
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.4
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'warning',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of files within the next 24 hours.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemFilesFillingUp',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 4*60*60) < 0
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.2
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'warning',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of files within the next 4 hours.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemOutOfFiles',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 5
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'warning',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available inodes left.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeFilesystemOutOfSpace',
|
|
|
|
expr: |||
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 3
|
2018-05-10 01:35:35 -07:00
|
|
|
and
|
2018-07-13 06:01:01 -07:00
|
|
|
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'critical',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeNetworkReceiveErrs',
|
|
|
|
expr: |||
|
|
|
|
increase(node_network_receive_errs[2m]) > 10
|
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'critical',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: '{{ $labels.instance }} interface {{ $labels.device }} shows errors while receiving packets ({{ $value }} errors in two minutes).',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alert: 'NodeNetworkTransmitErrs',
|
|
|
|
expr: |||
|
|
|
|
increase(node_network_transmit_errs[2m]) > 10
|
|
|
|
||| % $._config,
|
|
|
|
'for': '1h',
|
|
|
|
labels: {
|
|
|
|
severity: 'critical',
|
|
|
|
},
|
|
|
|
annotations: {
|
|
|
|
message: '{{ $labels.instance }} interface {{ $labels.device }} shows errors while transmitting packets ({{ $value }} errors in two minutes).',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
],
|
|
|
|
},
|
|
|
|
}
|