From a016d9cd6f43ba8be47f624de95113f897e1b9f9 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 15 Aug 2019 16:32:54 +0200 Subject: [PATCH 1/3] node-mixin: Improve disk usage panel - Use a stacked graph instead of a gauge as development over time is especially useful for disk space usage. - By only taking one metric per device into account, we avoid double-counting for devices that are mounted multiple times. Signed-off-by: beorn7 --- docs/node-mixin/dashboards/node.libsonnet | 66 +++++++++++++++++------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index 8e279c87..8e29b386 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -39,6 +39,7 @@ local gauge = promgrafonnet.gauge; datasource='$datasource', span=6, format='short', + min=0, fill=0, ) .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) @@ -90,7 +91,8 @@ local gauge = promgrafonnet.gauge; graphPanel.new( 'Disk I/O', datasource='$datasource', - span=9, + span=6, + min=0, fill=0, ) // TODO: Does it make sense to have those three in the same panel? @@ -126,21 +128,51 @@ local gauge = promgrafonnet.gauge; ], }; - // TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%. - // This needs to be added upstream in the promgrafonnet library and then changed here. - // TODO: Should this be partitioned by mountpoint? - local diskSpaceUsage = gauge.new( - 'Disk Space Usage', - ||| - 100 - - ( - sum(node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}) - / - sum(node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s}) - * 100 - ) - ||| % $._config, - ).withLowerBeingBetter(); + // TODO: Somehow partition this by device while excluding read-only devices. + local diskSpaceUsage = + graphPanel.new( + 'Disk Space Usage', + datasource='$datasource', + span=6, + format='bytes', + min=0, + fill=1, + stack=true, + ) + .addTarget(prometheus.target( + ||| + sum( + max by (device) ( + node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} + - + node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} + ) + ) + ||| % $._config, + legendFormat='used', + )) + .addTarget(prometheus.target( + ||| + sum( + max by (device) ( + node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} + ) + ) + ||| % $._config, + legendFormat='available', + )) + + { + seriesOverrides: [ + { + alias: 'used', + color: '#E0B400', + }, + { + alias: 'available', + color: '#73BF69', + }, + ], + }; local networkReceived = graphPanel.new( @@ -148,6 +180,7 @@ local gauge = promgrafonnet.gauge; datasource='$datasource', span=6, format='bytes', + min=0, fill=0, ) .addTarget(prometheus.target( @@ -162,6 +195,7 @@ local gauge = promgrafonnet.gauge; datasource='$datasource', span=6, format='bytes', + min=0, fill=0, ) .addTarget(prometheus.target( From 024d5ed55e57ff0564b7293c8e7accaf2371c5b6 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 15 Aug 2019 16:36:10 +0200 Subject: [PATCH 2/3] Fix title of CPU panel to usage We use the `mode="idle"` metric, but we are inverting it, so this is usage, and that's intended. Signed-off-by: beorn7 --- docs/node-mixin/dashboards/node.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index 8e29b386..36b330fc 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -12,7 +12,7 @@ local gauge = promgrafonnet.gauge; 'nodes.json': local idleCPU = graphPanel.new( - 'Idle CPU', + 'CPU Usage', datasource='$datasource', span=6, format='percentunit', From 44e5731de7c021abfd0935c087518364c959bd99 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 15 Aug 2019 16:43:57 +0200 Subject: [PATCH 3/3] Add line for number of cores to load graph Backported from the node dashboard in the kubernetes-mixin. Signed-off-by: beorn7 --- docs/node-mixin/dashboards/node.libsonnet | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index 36b330fc..78241ed9 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -44,7 +44,8 @@ local gauge = promgrafonnet.gauge; ) .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) - .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average')); + .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average')) + .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % $._config, legendFormat='logical cores')); local memoryGraph = graphPanel.new(