From d0c1d00d18867297326bc7b1c21de36ae50e6a92 Mon Sep 17 00:00:00 2001 From: Tom <12222103+critchtionary@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:49:22 +0000 Subject: [PATCH] Migrate dashboards to new grafonnet library (#3147) Migrated away from deprecated Grafonnet library. This replaces panels using Angular JS which are disabled by default in Grafana 11 and will be unsupported in Grafana 12. Fixes #3046 --------- Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com> --- docs/node-mixin/dashboards/use.libsonnet | 825 ++++++++++---------- docs/node-mixin/jsonnetfile.json | 15 +- docs/node-mixin/lib/prom-mixin.libsonnet | 925 +++++++++++------------ 3 files changed, 871 insertions(+), 894 deletions(-) diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index fce411ab..f9d9e07c 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -1,469 +1,480 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; +local variable = dashboard.variable; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; + +local timeSeriesPanel = grafana.panel.timeSeries; +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; +local tsLegend = tsOptions.legend; local c = import '../config.libsonnet'; -local datasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', -}; +local datasource = variable.datasource.new( + 'datasource', 'prometheus' +); + +local tsCommonPanelOptions = + variable.query.withDatasourceFromVariable(datasource) + + tsCustom.stacking.withMode('normal') + + tsCustom.withFillOpacity(100) + + tsCustom.withShowPoints('never') + + tsLegend.withShowLegend(false) + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc'); local CPUUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'CPU Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local CPUSaturation = // TODO: Is this a useful panel? At least there should be some explanation how load // average relates to the "CPU saturation" in the title. - graphPanel.new( + timeSeriesPanel.new( 'CPU Saturation (Load1 per CPU)', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memoryUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memorySaturation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Saturation (Major Page Faults)', - datasource='$datasource', - span=6, - format='rds', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('rds'); + +local networkOverrides = tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/Transmit/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsCustom.withTransform('negative-Y') + ), + ] +); local networkUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Network Utilisation (Bytes Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local networkSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Network Saturation (Drops Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local diskIOUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskIOSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Saturation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskSpaceUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk Space Utilisation', - datasource='$datasource', - span=12, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); { - _clusterTemplate:: template.new( - name='cluster', - datasource='$datasource', - query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, - current='', - hide=if $._config.showMultiCluster then '' else '2', - refresh=2, - includeAll=false, - sort=1 - ), + _clusterVariable:: + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'node_time_seconds', + ) + + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.refresh.onTime() + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(asc=true), grafanaDashboards+:: { 'node-rsrc-use.json': - dashboard.new( '%sUSE Method / Node' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addTemplate( - template.new( + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( 'instance', - '$datasource', - 'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config, - refresh='time', - sort=1 - ) - ) - .addRow( - row.new('CPU') - .addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation'))) - ) - .addRow( - row.new('Memory') - .addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - .addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation.addTarget(prometheus.target( - ||| - sort_desc(1 - - ( - max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - / - max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - ) != 0 - ) - ||| % $._config, legendFormat='{{device}}' - )) + 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, ) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sort_desc(1 - + ( + max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + / + max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + ) != 0 + ) + ||| % $._config + ) + prometheus.withLegendFormat('{{device}}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), - 'node-cluster-rsrc-use.json': dashboard.new( '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-cluster-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - ) != 0 ) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config, legendFormat='{{ instance }}' - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - ( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}' - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - ( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}', - )) - ) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - ( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - ( + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-cluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + ) != 0 ) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum without (device) ( - max without (fstype, mountpoint) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - - - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) - ||| % $._config, legendFormat='{{instance}}' - )) - ) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), + ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum without (device) ( + max without (fstype, mountpoint) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + - + node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } + if $._config.showMultiCluster then { 'node-multicluster-rsrc-use.json': dashboard.new( '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-multicluster-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - sum( - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s} - ) != 0) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - memorySaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum ( - sum without (device) ( - max without (fstype, mountpoint, instance, pod) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum( + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s} + ) != 0) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| + % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), + ]), + + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum ( + sum without (device) ( + max without (fstype, mountpoint, instance, pod) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } else {}, } diff --git a/docs/node-mixin/jsonnetfile.json b/docs/node-mixin/jsonnetfile.json index 721d4833..2d56d912 100644 --- a/docs/node-mixin/jsonnetfile.json +++ b/docs/node-mixin/jsonnetfile.json @@ -4,20 +4,11 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" } }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet-7.0" - } - }, - "version": "master" + "version": "main" } ], "legacyImports": false diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index bd01cfd4..f18c273c 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -1,560 +1,535 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; -local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet'; -local gaugePanel = grafana70.panel.gauge; -local table = grafana70.panel.table; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; +local variable = dashboard.variable; + +local timeSeriesPanel = grafana.panel.timeSeries; +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; + +local gaugePanel = grafana.panel.gauge; +local gaugeStep = gaugePanel.standardOptions.threshold.step; + +local table = grafana.panel.table; +local tableStep = table.standardOptions.threshold.step; +local tableOverride = table.standardOptions.override; +local tableTransformation = table.queryOptions.transformation; { new(config=null, platform=null, uid=null):: { - local prometheusDatasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, + local prometheusDatasourceVariable = variable.datasource.new( + 'datasource', 'prometheus' + ), - local clusterTemplatePrototype = - template.new( - 'cluster', - '$datasource', - '', - hide=if config.showMultiCluster then '' else '2', - refresh='time', - label='Cluster', - ), - local clusterTemplate = - if platform == 'Darwin' then - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config } - else - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config }, + local clusterVariablePrototype = + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.refresh.onTime() + + variable.query.generalOptions.withLabel('Cluster'), - local instanceTemplatePrototype = - template.new( - 'instance', - '$datasource', - '', - refresh='time', - label='Instance', - ), - local instanceTemplate = + local clusterVariable = if platform == 'Darwin' then - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config } + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + ' %(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}' % config, + ) else - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config }, + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + '%(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}' % config, + ), + + local instanceVariablePrototype = + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + variable.query.refresh.onTime() + + variable.query.generalOptions.withLabel('Instance'), + + local instanceVariable = + if platform == 'Darwin' then + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}' % config, + ) + else + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}' % config, + ), local idleCPU = - graphPanel.new( - 'CPU Usage', - datasource='$datasource', - span=6, - format='percentunit', - max=1, - min=0, - stack=true, - ) - .addTarget(prometheus.target( - ||| - ( - (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) - / ignoring(cpu) group_left - count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) - ) - ||| % config, - legendFormat='{{cpu}}', - intervalFactor=5, - )), - - local systemLoad = - graphPanel.new( - 'Load Average', - datasource='$datasource', - span=6, - format='short', - min=0, - fill=0, - ) - .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average')) - .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average')) - .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average')) - .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')), - - local memoryGraphPanelPrototype = - graphPanel.new( - 'Memory Usage', - datasource='$datasource', - span=9, - format='bytes', - min=0, - ), - local memoryGraph = - if platform == 'Linux' then - memoryGraphPanelPrototype { stack: true } - .addTarget(prometheus.target( + timeSeriesPanel.new('CPU Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('percentunit') + + tsCustom.stacking.withMode('normal') + + tsStandardOptions.withMax(1) + + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never') + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', ||| ( - node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) + / ignoring(cpu) group_left + count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) ) ||| % config, - legendFormat='memory used' - )) - .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers')) - .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached')) - .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free')) + ) + + prometheus.withLegendFormat('{{cpu}}') + + prometheus.withIntervalFactor(5), + ]), + + local systemLoad = + timeSeriesPanel.new('Load Average') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('short') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'), + prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'), + prometheus.new('$datasource', 'node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('15m load average'), + prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'), + ]), + + local memoryGraphPanelPrototype = + timeSeriesPanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bytes') + + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never'), + + local memoryGraph = + if platform == 'Linux' then + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('normal') + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config, + ) + prometheus.withLegendFormat('memory used'), + prometheus.new('$datasource', 'node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory buffers'), + prometheus.new('$datasource', 'node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory cached'), + prometheus.new('$datasource', 'node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory free'), + ]) else if platform == 'Darwin' then // not useful to stack - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='App Memory' - )) - .addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory')) - .addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')) + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'Memory Used' + ), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'App Memory' + ), + prometheus.new('$datasource', 'node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Wired Memory'), + prometheus.new('$datasource', 'node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Compressed'), + ]) + else if platform == 'AIX' then - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )), + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat('Memory Used'), + ]), // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. local memoryGaugePanelPrototype = - gaugePanel.new( - title='Memory Usage', - datasource='$datasource', - ) - .addThresholdStep('rgba(50, 172, 45, 0.97)') - .addThresholdStep('rgba(237, 129, 40, 0.89)', 80) - .addThresholdStep('rgba(245, 54, 54, 0.9)', 90) - .setFieldConfig(max=100, min=0, unit='percent') - + { - span: 3, - }, + gaugePanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + gaugePanel.standardOptions.thresholds.withSteps([ + gaugeStep.withColor('rgba(50, 172, 45, 0.97)'), + gaugeStep.withColor('rgba(237, 129, 40, 0.89)') + gaugeStep.withValue(80), + gaugeStep.withColor('rgba(245, 54, 54, 0.9)') + gaugeStep.withValue(90), + ]) + + gaugePanel.standardOptions.withMax(100) + + gaugePanel.standardOptions.withMin(0) + + gaugePanel.standardOptions.withUnit('percent'), local memoryGauge = if platform == 'Linux' then memoryGaugePanelPrototype - - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config, - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config, + ), + ]) else if platform == 'Darwin' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - ( - ( - avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - - avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) - * - 100 - ||| % config - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + ( + avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - + avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) + * + 100 + ||| % config + ), + ]) + else if platform == 'AIX' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config - )), + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config + ), + ]), local diskIO = - graphPanel.new( - 'Disk I/O', - datasource='$datasource', - span=6, - min=0, - fill=0, - ) - // TODO: Does it make sense to have those three in the same panel? - .addTarget(prometheus.target( - 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} read', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} written', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} io time', - intervalFactor=1, - )) + - { - seriesOverrides: [ - { - alias: '/ read| written/', - yaxis: 1, - }, - { - alias: '/ io time/', - yaxis: 2, - }, - ], - yaxes: [ - self.yaxe(format='Bps'), - self.yaxe(format='percentunit'), - ], - }, + timeSeriesPanel.new('Disk I/O') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + // TODO: Does it make sense to have those three in the same panel? + prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} read') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} written') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} io time') + + prometheus.withIntervalFactor(1), + ]) + + tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/ read| written/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsStandardOptions.withUnit('Bps') + ), + tsStandardOptions.override.byRegexp.new('/ io time/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(tsStandardOptions.withUnit('percentunit')), + ] + ), local diskSpaceUsage = - table.new( - title='Disk Space Usage', - datasource='$datasource', - ) - .setFieldConfig(unit='decbytes') - .addThresholdStep(color='green', value=null) - .addThresholdStep(color='yellow', value=0.8) - .addThresholdStep(color='red', value=0.9) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addOverride( - matcher={ - id: 'byName', - options: 'Mounted on', - }, - properties=[ - { - id: 'custom.width', - value: 260, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Size', - }, - properties=[ - - { - id: 'custom.width', - value: 93, - }, - - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Used', - }, - properties=[ - { - id: 'custom.width', - value: 72, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Available', - }, - properties=[ - { - id: 'custom.width', - value: 88, - }, - ], - ) - - .addOverride( - matcher={ - id: 'byName', - options: 'Used, %', - }, - properties=[ - { - id: 'unit', - value: 'percentunit', - }, - { - id: 'custom.displayMode', - value: 'gradient-gauge', - }, - { - id: 'max', - value: 1, - }, - { - id: 'min', - value: 0, - }, + table.new('Disk Space Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + table.standardOptions.withUnit('decbytes') + + table.standardOptions.thresholds.withSteps( + [ + tableStep.withColor('green'), + tableStep.withColor('yellow') + gaugeStep.withValue(0.8), + tableStep.withColor('red') + gaugeStep.withValue(0.9), ] ) - + { span: 6 } - + { - transformations: [ + + table.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + ]) + + table.standardOptions.withOverrides([ + tableOverride.byName.new('Mounted on') + + tableOverride.byName.withProperty('custom.width', 260), + tableOverride.byName.new('Size') + + tableOverride.byName.withProperty('custom.width', 93), + tableOverride.byName.new('Used') + + tableOverride.byName.withProperty('custom.width', 72), + tableOverride.byName.new('Available') + + tableOverride.byName.withProperty('custom.width', 88), + tableOverride.byName.new('Used, %') + + tableOverride.byName.withProperty('unit', 'percentunit') + + tableOverride.byName.withPropertiesFromOptions( + table.fieldConfig.defaults.custom.withCellOptions( + { type: 'gauge' }, + ) + ) + + tableOverride.byName.withProperty('max', 1) + + tableOverride.byName.withProperty('min', 0), + ]) + + table.queryOptions.withTransformations([ + tableTransformation.withId('groupBy') + + tableTransformation.withOptions( { - id: 'groupBy', - options: { - fields: { - 'Value #A': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - 'Value #B': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - mountpoint: { - aggregations: [], - operation: 'groupby', - }, + fields: { + 'Value #A': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + 'Value #B': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + mountpoint: { + aggregations: [], + operation: 'groupby', }, }, - }, + } + ), + tableTransformation.withId('merge'), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( { - id: 'merge', - options: {}, - }, - { - id: 'calculateField', - options: { - alias: 'Used', - binary: { - left: 'Value #A (lastNotNull)', - operator: '-', - reducer: 'sum', - right: 'Value #B (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, + alias: 'Used', + binary: { + left: 'Value #A (lastNotNull)', + operator: '-', + reducer: 'sum', + right: 'Value #B (lastNotNull)', }, - }, - { - id: 'calculateField', - options: { - alias: 'Used, %', - binary: { - left: 'Used', - operator: '/', - reducer: 'sum', - right: 'Value #A (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, + mode: 'binary', + reduce: { + reducer: 'sum', }, - }, + } + ), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( { - id: 'organize', - options: { - excludeByName: {}, - indexByName: {}, - renameByName: { - 'Value #A (lastNotNull)': 'Size', - 'Value #B (lastNotNull)': 'Available', - mountpoint: 'Mounted on', + alias: 'Used, %', + binary: { + left: 'Used', + operator: '/', + reducer: 'sum', + right: 'Value #A (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + } + ), + tableTransformation.withId('organize') + + tableTransformation.withOptions( + { + excludeByName: {}, + indexByName: {}, + renameByName: { + 'Value #A (lastNotNull)': 'Size', + 'Value #B (lastNotNull)': 'Available', + mountpoint: 'Mounted on', + }, + } + ), + tableTransformation.withId('sortBy') + + tableTransformation.withOptions( + { + fields: {}, + sort: [ + { + field: 'Mounted on', }, - }, - }, - { - id: 'sortBy', - options: { - fields: {}, - sort: [ - { - field: 'Mounted on', - }, - ], - }, - }, - ], - }, + ], + } + ), + ]), local networkReceived = - graphPanel.new( - 'Network Received', - description='Network received (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Received') + + timeSeriesPanel.panelOptions.withDescription('Network received (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('{{device}}') + + prometheus.withIntervalFactor(1), + ]), local networkTransmitted = - graphPanel.new( - 'Network Transmitted', - description='Network transmitted (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Transmitted') + + timeSeriesPanel.panelOptions.withDescription('Network transmitted (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('{{device}}') + + prometheus.withIntervalFactor(1), + ]), local cpuRow = row.new('CPU') - .addPanel(idleCPU) - .addPanel(systemLoad), + + row.withPanels([ + idleCPU, + systemLoad, + ]), - local memoryRow = - row.new('Memory') - .addPanel(memoryGraph) - .addPanel(memoryGauge), + local memoryRow = [ + row.new('Memory') + row.gridPos.withY(8), + memoryGraph + row.gridPos.withX(0) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(18), + memoryGauge + row.gridPos.withX(18) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(6), + ], local diskRow = row.new('Disk') - .addPanel(diskIO) - .addPanel(diskSpaceUsage), + + row.withPanels([ + diskIO, + diskSpaceUsage, + ]), local networkRow = row.new('Network') - .addPanel(networkReceived) - .addPanel(networkTransmitted), + + row.withPanels([ + networkReceived, + networkTransmitted, + ]), - local rows = - [ + local panels = + grafana.util.grid.makeGrid([ cpuRow, - memoryRow, + ], panelWidth=12, panelHeight=7) + + memoryRow + + grafana.util.grid.makeGrid([ diskRow, networkRow, - ], + ], panelWidth=12, panelHeight=7, startY=18), - local templates = + local variables = [ - prometheusDatasourceTemplate, - clusterTemplate, - instanceTemplate, + prometheusDatasourceVariable, + clusterVariable, + instanceVariable, ], - dashboard: if platform == 'Linux' then dashboard.new( '%sNodes' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'Darwin' then dashboard.new( '%sMacOS' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'AIX' then dashboard.new( '%sAIX' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows), + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels), }, }