node-mxin: Improve nodes dashboard (#1448)

* node-mixin: Improve nodes dashboard

- Use stacking where it makes sense.
- Normalize idle CPU so that stacking is more meaningful.
- Consistently fill where stacking is used but don't fill where not.
- Fix y axis max value for Idle CPU panel.
- Fix y axis min value for memory usage panel.
- Use `$__interval` for range where applicable (and set min step
  to 1m).
- Make the right Y axis for disk I/O actually work.

This is just an incremental improvements. It doesn't touch the more
involved TODOs.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
Björn Rabenstein 2019-08-15 00:40:51 +02:00 committed by GitHub
parent 0d3a2d3209
commit 7ef6f2576d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -16,25 +16,30 @@ local gauge = promgrafonnet.gauge;
datasource='$datasource', datasource='$datasource',
span=6, span=6,
format='percentunit', format='percentunit',
max=100, max=1,
min=0, min=0,
stack=true,
) )
.addTarget(prometheus.target( .addTarget(prometheus.target(
// TODO: Consider using `${__interval}` as range and a 1m min step.
||| |||
1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m]) (
(1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[$__interval]))
/ ignoring(cpu) group_left
count without (cpu)( node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
)
||| % $._config, ||| % $._config,
legendFormat='{{cpu}}', legendFormat='{{cpu}}',
intervalFactor=10, intervalFactor=5,
interval='1m',
)); ));
// TODO: Is this panel useful?
local systemLoad = local systemLoad =
graphPanel.new( graphPanel.new(
'Load Average', 'Load Average',
datasource='$datasource', datasource='$datasource',
span=6, span=6,
format='short', format='short',
fill=0,
) )
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average'))
@ -46,6 +51,8 @@ local gauge = promgrafonnet.gauge;
datasource='$datasource', datasource='$datasource',
span=9, span=9,
format='bytes', format='bytes',
stack=true,
min=0,
) )
.addTarget(prometheus.target( .addTarget(prometheus.target(
||| |||
@ -84,20 +91,32 @@ local gauge = promgrafonnet.gauge;
'Disk I/O', 'Disk I/O',
datasource='$datasource', datasource='$datasource',
span=9, span=9,
fill=0,
) )
// TODO: Does it make sense to have those three in the same panel? // TODO: Does it make sense to have those three in the same panel?
// TODO: Consider using `${__interval}` as range and a 1m min step. .addTarget(prometheus.target(
.addTarget(prometheus.target('rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} read')) 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
.addTarget(prometheus.target('rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} written')) legendFormat='{{device}} read',
.addTarget(prometheus.target('rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} io time')) + interval='1m',
))
.addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} written',
interval='1m',
))
.addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} io time',
interval='1m',
)) +
{ {
seriesOverrides: [ seriesOverrides: [
{ {
alias: 'read', alias: '/ read| written/',
yaxis: 1, yaxis: 1,
}, },
{ {
alias: 'io time', alias: '/ io time/',
yaxis: 2, yaxis: 2,
}, },
], ],
@ -129,9 +148,13 @@ local gauge = promgrafonnet.gauge;
datasource='$datasource', datasource='$datasource',
span=6, span=6,
format='bytes', format='bytes',
fill=0,
) )
// TODO: Consider using `${__interval}` as range and a 1m min step. .addTarget(prometheus.target(
.addTarget(prometheus.target('rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}',
interval='1m',
));
local networkTransmitted = local networkTransmitted =
graphPanel.new( graphPanel.new(
@ -139,9 +162,13 @@ local gauge = promgrafonnet.gauge;
datasource='$datasource', datasource='$datasource',
span=6, span=6,
format='bytes', format='bytes',
fill=0,
) )
// TODO: Consider using `${__interval}` as range and a 1m min step. .addTarget(prometheus.target(
.addTarget(prometheus.target('rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}',
interval='1m',
));
dashboard.new('Nodes', time_from='now-1h') dashboard.new('Nodes', time_from='now-1h')
.addTemplate( .addTemplate(