docs/node-mixin: Improve memory pressure rule

The `instance:node_memory_swap_io_pages:rate1m` rule was intended to
measure the amount of memory pressure a system is under, but its name is
a bit misleading (it specifically refers to swap), and the rate of
`node_vmstat_pgmajfault` is a better metric for memory pressure
(see #1524).

This commit renames `instance:node_memory_swap_io_pages:rate1m` to
`instance:node_vmstat_pgmajfault:rate1m`, and defines it as
`rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[1m])`. The
dashboards are updated accordingly.

Signed-off-by: Benoît Knecht <benoit.knecht@fsfe.org>
This commit is contained in:
Benoît Knecht 2019-10-24 18:13:36 +02:00
parent 74a90e81c0
commit 5a7b85876d
2 changed files with 6 additions and 10 deletions

View file

@ -53,8 +53,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
.addPanel(
g.panel('Memory Saturation (Swapped Pages)') +
g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes('rps') },
)
@ -201,8 +201,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('Memory Saturation (pages swapped per second)') +
g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') +
g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') +
{
yaxes: g.yaxes('short'),
legend+: { show: false },

View file

@ -50,13 +50,9 @@
||| % $._config,
},
{
record: 'instance:node_memory_swap_io_pages:rate1m',
record: 'instance:node_vmstat_pgmajfault:rate1m',
expr: |||
(
rate(node_vmstat_pgpgin{%(nodeExporterSelector)s}[1m])
+
rate(node_vmstat_pgpgout{%(nodeExporterSelector)s}[1m])
)
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[1m])
||| % $._config,
},
{