From a5908bf82b25e1232ac59c83cd43ffe337664669 Mon Sep 17 00:00:00 2001 From: Johannes 'fish' Ziemke Date: Sat, 3 Apr 2021 12:40:22 +0200 Subject: [PATCH] Make interval configurable Signed-off-by: Johannes 'fish' Ziemke --- docs/node-mixin/config.libsonnet | 2 ++ docs/node-mixin/dashboards/node.libsonnet | 12 ++++---- docs/node-mixin/dashboards/use.libsonnet | 36 +++++++++++------------ docs/node-mixin/rules/rules.libsonnet | 32 ++++++++++---------- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index 47e741ef..a65f6f7a 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -53,5 +53,7 @@ fsSpaceAvailableWarningThreshold: 3, grafana_prefix: '', + + rateInterval: '5m', }, } diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index 5cec0fab..60c9ab8d 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -30,7 +30,7 @@ local gauge = promgrafonnet.gauge; ||| % $._config, legendFormat='{{cpu}}', intervalFactor=5, - interval='5m', + interval='$__rate_interval', )); local systemLoad = @@ -101,17 +101,17 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} read', - interval='5m', + interval='$__rate_interval', )) .addTarget(prometheus.target( 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} written', - interval='5m', + interval='$__rate_interval', )) .addTarget(prometheus.target( 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} io time', - interval='5m', + interval='$__rate_interval', )) + { seriesOverrides: [ @@ -188,7 +188,7 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, legendFormat='{{device}}', - interval='5m', + interval='$__rate_interval', )); local networkTransmitted = @@ -203,7 +203,7 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, legendFormat='{{device}}', - interval='5m', + interval='$__rate_interval', )); dashboard.new('Nodes', time_from='now-1h') diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 76eeed6e..83f2d556 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -12,7 +12,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('CPU Utilisation') + g.queryPanel(||| ( - instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s} + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} * instance:node_num_cpu:sum{%(nodeExporterSelector)s} ) @@ -47,7 +47,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Saturation (Major Page Faults)') + - g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + + g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + g.stack + { yaxes: g.yaxes('rps') }, ) @@ -58,8 +58,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, - 'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, ], ['{{instance}} Receive', '{{instance}} Transmit'], legendLink, @@ -84,8 +84,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Saturation (Drops Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, - 'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, ], ['{{instance}} Receive', '{{instance}} Transmit'], legendLink, @@ -116,8 +116,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson // TODO: Does the partition by device make sense? Using the most utilized device per // instance might make more sense. g.queryPanel(||| - instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s})) + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -125,8 +125,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson .addPanel( g.panel('Disk IO Saturation') + g.queryPanel(||| - instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s})) + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -156,7 +156,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.row('CPU') .addPanel( g.panel('CPU Utilisation') + - g.queryPanel('instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + + g.queryPanel('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + { yaxes: g.yaxes('percentunit'), legend+: { show: false }, @@ -182,7 +182,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Saturation (Major Page Faults)') + - g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') + + g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') + { yaxes: g.yaxes('short'), legend+: { show: false }, @@ -195,8 +195,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, - 'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, ], ['Receive', 'Transmit'], ) + @@ -219,8 +219,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Saturation (Drops Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, - 'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, ], ['Receive drops', 'Transmit drops'], ) + @@ -244,12 +244,12 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.row('Disk IO') .addPanel( g.panel('Disk IO Utilisation') + - g.queryPanel('instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + + g.queryPanel('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + { yaxes: g.yaxes('percentunit') }, ) .addPanel( g.panel('Disk IO Saturation') + - g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + + g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + { yaxes: g.yaxes('percentunit') }, ) ) diff --git a/docs/node-mixin/rules/rules.libsonnet b/docs/node-mixin/rules/rules.libsonnet index dd9899af..5367ac3f 100644 --- a/docs/node-mixin/rules/rules.libsonnet +++ b/docs/node-mixin/rules/rules.libsonnet @@ -17,10 +17,10 @@ }, { // CPU utilisation is % CPU is not idle. - record: 'instance:node_cpu_utilisation:rate5m', + record: 'instance:node_cpu_utilisation:rate%(rateInterval)s' % $._config, expr: ||| 1 - avg without (cpu, mode) ( - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[5m]) + rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[%(rateInterval)s]) ) ||| % $._config, }, @@ -50,55 +50,55 @@ ||| % $._config, }, { - record: 'instance:node_vmstat_pgmajfault:rate5m', + record: 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s' % $._config, expr: ||| - rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) + rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[%(rateInterval)s]) ||| % $._config, }, { // Disk utilisation (seconds spent, 1 second rate). - record: 'instance_device:node_disk_io_time_seconds:rate5m', + record: 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s' % $._config, expr: ||| - rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) + rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s]) ||| % $._config, }, { // Disk saturation (weighted seconds spent, 1 second rate). - record: 'instance_device:node_disk_io_time_weighted_seconds:rate5m', + record: 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s' % $._config, expr: ||| - rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) + rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s]) ||| % $._config, }, { - record: 'instance:node_network_receive_bytes_excluding_lo:rate5m', + record: 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s' % $._config, expr: ||| sum without (device) ( - rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) + rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) ) ||| % $._config, }, { - record: 'instance:node_network_transmit_bytes_excluding_lo:rate5m', + record: 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s' % $._config, expr: ||| sum without (device) ( - rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) + rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) ) ||| % $._config, }, // TODO: Find out if those drops ever happen on modern switched networks. { - record: 'instance:node_network_receive_drop_excluding_lo:rate5m', + record: 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s' % $._config, expr: ||| sum without (device) ( - rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) + rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) ) ||| % $._config, }, { - record: 'instance:node_network_transmit_drop_excluding_lo:rate5m', + record: 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s' % $._config, expr: ||| sum without (device) ( - rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) + rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) ) ||| % $._config, },