Migrate dashboards to new grafonnet library (#3147)
Some checks failed
golangci-lint / lint (push) Has been cancelled

Migrated away from deprecated Grafonnet library. This replaces panels using Angular JS which are disabled by default in Grafana 11 and will be unsupported in Grafana 12.

Fixes #3046

---------

Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com>
This commit is contained in:
Tom 2024-12-19 15:49:22 +00:00 committed by GitHub
parent ff97e35a71
commit d0c1d00d18
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 871 additions and 894 deletions

View file

@ -1,201 +1,178 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local variable = dashboard.variable;
local prometheus = grafana.prometheus; local row = grafana.panel.row;
local template = grafana.template; local prometheus = grafana.query.prometheus;
local graphPanel = grafana.graphPanel;
local timeSeriesPanel = grafana.panel.timeSeries;
local tsOptions = timeSeriesPanel.options;
local tsStandardOptions = timeSeriesPanel.standardOptions;
local tsQueryOptions = timeSeriesPanel.queryOptions;
local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom;
local tsLegend = tsOptions.legend;
local c = import '../config.libsonnet'; local c = import '../config.libsonnet';
local datasourceTemplate = { local datasource = variable.datasource.new(
current: { 'datasource', 'prometheus'
text: 'default', );
value: 'default',
}, local tsCommonPanelOptions =
hide: 0, variable.query.withDatasourceFromVariable(datasource)
label: 'Data Source', + tsCustom.stacking.withMode('normal')
name: 'datasource', + tsCustom.withFillOpacity(100)
options: [], + tsCustom.withShowPoints('never')
query: 'prometheus', + tsLegend.withShowLegend(false)
refresh: 1, + tsOptions.tooltip.withMode('multi')
regex: '', + tsOptions.tooltip.withSort('desc');
type: 'datasource',
};
local CPUUtilisation = local CPUUtilisation =
graphPanel.new( timeSeriesPanel.new(
'CPU Utilisation', 'CPU Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local CPUSaturation = local CPUSaturation =
// TODO: Is this a useful panel? At least there should be some explanation how load // TODO: Is this a useful panel? At least there should be some explanation how load
// average relates to the "CPU saturation" in the title. // average relates to the "CPU saturation" in the title.
graphPanel.new( timeSeriesPanel.new(
'CPU Saturation (Load1 per CPU)', 'CPU Saturation (Load1 per CPU)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memoryUtilisation = local memoryUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Memory Utilisation', 'Memory Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memorySaturation = local memorySaturation =
graphPanel.new( timeSeriesPanel.new(
'Memory Saturation (Major Page Faults)', 'Memory Saturation (Major Page Faults)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='rds', + tsStandardOptions.withUnit('rds');
stack=true,
fill=10, local networkOverrides = tsStandardOptions.withOverrides(
legend_show=false, [
) { tooltip+: { sort: 2 } }; tsStandardOptions.override.byRegexp.new('/Transmit/')
+ tsStandardOptions.override.byRegexp.withPropertiesFromOptions(
tsCustom.withTransform('negative-Y')
),
]
);
local networkUtilisation = local networkUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Network Utilisation (Bytes Receive/Transmit)', 'Network Utilisation (Bytes Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local networkSaturation = local networkSaturation =
graphPanel.new( timeSeriesPanel.new(
'Network Saturation (Drops Receive/Transmit)', 'Network Saturation (Drops Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local diskIOUtilisation = local diskIOUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Utilisation', 'Disk IO Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskIOSaturation = local diskIOSaturation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Saturation', 'Disk IO Saturation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskSpaceUtilisation = local diskSpaceUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk Space Utilisation', 'Disk Space Utilisation',
datasource='$datasource', )
span=12, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
{ {
_clusterTemplate:: template.new( _clusterVariable::
name='cluster', variable.query.new('cluster')
datasource='$datasource', + variable.query.withDatasourceFromVariable(datasource)
query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, + variable.query.queryTypes.withLabelValues(
current='', $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2', 'node_time_seconds',
refresh=2, )
includeAll=false, + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing())
sort=1 + variable.query.refresh.onTime()
), + variable.query.selectionOptions.withIncludeAll(false)
+ variable.query.withSort(asc=true),
grafanaDashboards+:: { grafanaDashboards+:: {
'node-rsrc-use.json': 'node-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Node' % $._config.dashboardNamePrefix, '%sUSE Method / Node' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addTemplate( + dashboard.withTimezone('utc')
template.new( + dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
$._clusterVariable,
variable.query.new('instance')
+ variable.query.withDatasourceFromVariable(datasource)
+ variable.query.queryTypes.withLabelValues(
'instance', 'instance',
'$datasource', 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config,
'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config,
refresh='time',
sort=1
) )
) + variable.query.refresh.onTime()
.addRow( + variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) + row.withPanels([
.addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation'))) CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
) CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]),
.addRow( ]),
row.new('Memory') row.new('Memory')
.addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) + row.withPanels([
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults'))) memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
) memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]),
.addRow( ]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
) ]),
.addPanel( networkSaturation + tsQueryOptions.withTargets([
networkSaturation prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) ]),
) ]),
)
.addRow(
row.new('Disk IO') row.new('Disk IO')
.addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) + row.withPanels([
.addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
) diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
.addRow( ]),
], panelWidth=12, panelHeight=7)
+ grafana.util.grid.makeGrid([
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation.addTarget(prometheus.target( diskSpaceUtilisation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
sort_desc(1 - sort_desc(1 -
( (
@ -204,28 +181,36 @@ local diskSpaceUtilisation =
max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
) != 0 ) != 0
) )
||| % $._config, legendFormat='{{device}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{device}}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
'node-cluster-rsrc-use.json': 'node-cluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-cluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addRow( + dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-cluster-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
$._clusterVariable,
variable.query.withDatasourceFromVariable(datasource)
+ variable.query.refresh.onTime()
+ variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel( + row.withPanels([
CPUUtilisation CPUUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
(( ((
instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
@ -233,79 +218,90 @@ local diskSpaceUtilisation =
instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
) != 0 ) ) != 0 )
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
||| % $._config, legendFormat='{{ instance }}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
.addPanel( CPUSaturation + tsQueryOptions.withTargets([
CPUSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
( (
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ) != 0
||| % $._config, legendFormat='{{instance}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
) ]),
.addRow(
row.new('Memory') row.new('Memory')
.addPanel( + row.withPanels([
memoryUtilisation memoryUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
( (
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ) != 0
||| % $._config, legendFormat='{{instance}}', ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) memorySaturation + tsQueryOptions.withTargets([
) prometheus.new(
.addRow( '$datasource',
'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config
) + prometheus.withLegendFormat('{{ instance }}'),
]),
]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) prometheus.new(
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) '$datasource',
) 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
.addPanel( ) + prometheus.withLegendFormat('{{ instance }} Receive'),
networkSaturation prometheus.new(
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) '$datasource',
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) ) + prometheus.withLegendFormat('{{ instance }} Transmit'),
) ]),
.addRow( networkSaturation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Receive'),
prometheus.new(
'$datasource',
'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Transmit'),
]),
]),
row.new('Disk IO') row.new('Disk IO')
.addPanel( + row.withPanels([
diskIOUtilisation diskIOUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
(
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}} {{device}}' ) + prometheus.withLegendFormat('{{ instance }} {{device}}'),
)) ]),
) diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
.addPanel( '$datasource',
diskIOSaturation
.addTarget(prometheus.target(
||| |||
(
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}} {{device}}' ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]),
)) ]),
) ], panelWidth=12, panelHeight=7)
) + grafana.util.grid.makeGrid([
.addRow(
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation diskSpaceUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum without (device) ( sum without (device) (
max without (fstype, mountpoint) (( max without (fstype, mountpoint) ((
@ -315,28 +311,37 @@ local diskSpaceUtilisation =
) != 0) ) != 0)
) )
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"})))
||| % $._config, legendFormat='{{instance}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} + } +
if $._config.showMultiCluster then { if $._config.showMultiCluster then {
'node-multicluster-rsrc-use.json': 'node-multicluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-multicluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addRow( + dashboard.withTags($._config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-multicluster-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
variable.query.withDatasourceFromVariable(datasource)
+ variable.query.refresh.onTime()
+ variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel( + row.withPanels([
CPUUtilisation CPUUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum( sum(
(( ((
@ -346,112 +351,116 @@ local diskSpaceUtilisation =
) != 0) ) != 0)
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
) by (%(clusterLabel)s) ) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addPanel( CPUSaturation + tsQueryOptions.withTargets([
CPUSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
) ]),
.addRow(
row.new('Memory') row.new('Memory')
.addPanel( + row.withPanels([
memoryUtilisation memoryUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addPanel( memorySaturation + tsQueryOptions.withTargets([
memorySaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config |||
)) % $._config
) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addRow( ]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
) ]),
.addPanel( networkSaturation + tsQueryOptions.withTargets([
networkSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
) ]),
) ]),
.addRow(
row.new('Disk IO') row.new('Disk IO')
.addPanel( + row.withPanels([
diskIOUtilisation diskIOUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s, device) ) != 0) by (%(clusterLabel)s, device)
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'),
) ]),
.addPanel( diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
diskIOSaturation '$datasource',
.addTarget(prometheus.target(
||| |||
sum(( sum((
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s, device) ) != 0) by (%(clusterLabel)s, device)
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]),
) ]),
)
.addRow( ], panelWidth=12, panelHeight=7)
+ grafana.util.grid.makeGrid([
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation diskSpaceUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum ( sum (
sum without (device) ( sum without (device) (
@ -461,9 +470,11 @@ local diskSpaceUtilisation =
) )
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s})))
) by (%(clusterLabel)s) ) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} else {}, } else {},
} }

View file

@ -4,20 +4,11 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet.git",
"subdir": "grafonnet" "subdir": "gen/grafonnet-latest"
} }
}, },
"version": "master" "version": "main"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "master"
} }
], ],
"legacyImports": false "legacyImports": false

View file

@ -1,76 +1,85 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local row = grafana.panel.row;
local prometheus = grafana.prometheus; local prometheus = grafana.query.prometheus;
local template = grafana.template; local variable = dashboard.variable;
local graphPanel = grafana.graphPanel;
local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet'; local timeSeriesPanel = grafana.panel.timeSeries;
local gaugePanel = grafana70.panel.gauge; local tsOptions = timeSeriesPanel.options;
local table = grafana70.panel.table; local tsStandardOptions = timeSeriesPanel.standardOptions;
local tsQueryOptions = timeSeriesPanel.queryOptions;
local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom;
local gaugePanel = grafana.panel.gauge;
local gaugeStep = gaugePanel.standardOptions.threshold.step;
local table = grafana.panel.table;
local tableStep = table.standardOptions.threshold.step;
local tableOverride = table.standardOptions.override;
local tableTransformation = table.queryOptions.transformation;
{ {
new(config=null, platform=null, uid=null):: { new(config=null, platform=null, uid=null):: {
local prometheusDatasourceTemplate = { local prometheusDatasourceVariable = variable.datasource.new(
current: { 'datasource', 'prometheus'
text: 'default',
value: 'default',
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
local clusterTemplatePrototype =
template.new(
'cluster',
'$datasource',
'',
hide=if config.showMultiCluster then '' else '2',
refresh='time',
label='Cluster',
), ),
local clusterTemplate =
if platform == 'Darwin' then
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config }
else
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config },
local instanceTemplatePrototype = local clusterVariablePrototype =
template.new( variable.query.new('cluster')
+ variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
+ (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing())
+ variable.query.refresh.onTime()
+ variable.query.generalOptions.withLabel('Cluster'),
local clusterVariable =
if platform == 'Darwin' then
clusterVariablePrototype
+ variable.query.queryTypes.withLabelValues(
' %(clusterLabel)s' % config,
'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}' % config,
)
else
clusterVariablePrototype
+ variable.query.queryTypes.withLabelValues(
'%(clusterLabel)s' % config,
'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}' % config,
),
local instanceVariablePrototype =
variable.query.new('instance')
+ variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
+ variable.query.refresh.onTime()
+ variable.query.generalOptions.withLabel('Instance'),
local instanceVariable =
if platform == 'Darwin' then
instanceVariablePrototype
+ variable.query.queryTypes.withLabelValues(
'instance', 'instance',
'$datasource', 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}' % config,
'', )
refresh='time',
label='Instance',
),
local instanceTemplate =
if platform == 'Darwin' then
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config }
else else
instanceTemplatePrototype instanceVariablePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config }, + variable.query.queryTypes.withLabelValues(
'instance',
'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}' % config,
),
local idleCPU = local idleCPU =
graphPanel.new( timeSeriesPanel.new('CPU Usage')
'CPU Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('percentunit')
span=6, + tsCustom.stacking.withMode('normal')
format='percentunit', + tsStandardOptions.withMax(1)
max=1, + tsStandardOptions.withMin(0)
min=0, + tsOptions.tooltip.withMode('multi')
stack=true, + tsCustom.withFillOpacity(10)
) + tsCustom.withShowPoints('never')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
@ -78,36 +87,42 @@ local table = grafana70.panel.table;
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
) )
||| % config, ||| % config,
legendFormat='{{cpu}}', )
intervalFactor=5, + prometheus.withLegendFormat('{{cpu}}')
)), + prometheus.withIntervalFactor(5),
]),
local systemLoad = local systemLoad =
graphPanel.new( timeSeriesPanel.new('Load Average')
'Load Average', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('short')
span=6, + tsStandardOptions.withMin(0)
format='short', + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average')) prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'),
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average')) prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'),
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average')) prometheus.new('$datasource', 'node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('15m load average'),
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')), prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'),
]),
local memoryGraphPanelPrototype = local memoryGraphPanelPrototype =
graphPanel.new( timeSeriesPanel.new('Memory Usage')
'Memory Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bytes')
span=9, + tsStandardOptions.withMin(0)
format='bytes', + tsOptions.tooltip.withMode('multi')
min=0, + tsCustom.withFillOpacity(10)
), + tsCustom.withShowPoints('never'),
local memoryGraph = local memoryGraph =
if platform == 'Linux' then if platform == 'Linux' then
memoryGraphPanelPrototype { stack: true } memoryGraphPanelPrototype
.addTarget(prometheus.target( + tsCustom.stacking.withMode('normal')
+ tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
@ -119,16 +134,19 @@ local table = grafana70.panel.table;
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, ||| % config,
legendFormat='memory used' ) + prometheus.withLegendFormat('memory used'),
)) prometheus.new('$datasource', 'node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory buffers'),
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers')) prometheus.new('$datasource', 'node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory cached'),
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached')) prometheus.new('$datasource', 'node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory free'),
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free')) ])
else if platform == 'Darwin' then else if platform == 'Darwin' then
// not useful to stack // not useful to stack
memoryGraphPanelPrototype { stack: false } memoryGraphPanelPrototype
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) + tsCustom.stacking.withMode('none')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
@ -136,50 +154,61 @@ local table = grafana70.panel.table;
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='Memory Used' ||| % config
)) ) + prometheus.withLegendFormat(
.addTarget(prometheus.target( 'Memory Used'
),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='App Memory' ||| % config
)) ) + prometheus.withLegendFormat(
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory')) 'App Memory'
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')) ),
prometheus.new('$datasource', 'node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Wired Memory'),
prometheus.new('$datasource', 'node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Compressed'),
])
else if platform == 'AIX' then else if platform == 'AIX' then
memoryGraphPanelPrototype { stack: false } memoryGraphPanelPrototype
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) + tsCustom.stacking.withMode('none')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='Memory Used' ||| % config
)), ) + prometheus.withLegendFormat('Memory Used'),
]),
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
local memoryGaugePanelPrototype = local memoryGaugePanelPrototype =
gaugePanel.new( gaugePanel.new('Memory Usage')
title='Memory Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + gaugePanel.standardOptions.thresholds.withSteps([
) gaugeStep.withColor('rgba(50, 172, 45, 0.97)'),
.addThresholdStep('rgba(50, 172, 45, 0.97)') gaugeStep.withColor('rgba(237, 129, 40, 0.89)') + gaugeStep.withValue(80),
.addThresholdStep('rgba(237, 129, 40, 0.89)', 80) gaugeStep.withColor('rgba(245, 54, 54, 0.9)') + gaugeStep.withValue(90),
.addThresholdStep('rgba(245, 54, 54, 0.9)', 90) ])
.setFieldConfig(max=100, min=0, unit='percent') + gaugePanel.standardOptions.withMax(100)
+ { + gaugePanel.standardOptions.withMin(0)
span: 3, + gaugePanel.standardOptions.withUnit('percent'),
},
local memoryGauge = local memoryGauge =
if platform == 'Linux' then if platform == 'Linux' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
+ gaugePanel.queryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
100 - 100 -
( (
@ -188,11 +217,14 @@ local table = grafana70.panel.table;
* 100 * 100
) )
||| % config, ||| % config,
)) ),
])
else if platform == 'Darwin' then else if platform == 'Darwin' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
.addTarget(prometheus.target( + gaugePanel.queryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
( (
@ -206,10 +238,14 @@ local table = grafana70.panel.table;
* *
100 100
||| % config ||| % config
)) ),
])
else if platform == 'AIX' then else if platform == 'AIX' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
.addTarget(prometheus.target( + gaugePanel.queryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
100 - 100 -
( (
@ -218,156 +254,94 @@ local table = grafana70.panel.table;
* 100 * 100
) )
||| % config ||| % config
)), ),
]),
local diskIO = local diskIO =
graphPanel.new( timeSeriesPanel.new('Disk I/O')
'Disk I/O', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withMin(0)
span=6, + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
// TODO: Does it make sense to have those three in the same panel? // TODO: Does it make sense to have those three in the same panel?
.addTarget(prometheus.target( prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, + prometheus.withLegendFormat('{{device}} read')
legendFormat='{{device}} read', + prometheus.withIntervalFactor(1),
intervalFactor=1, prometheus.new('$datasource', 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
)) + prometheus.withLegendFormat('{{device}} written')
.addTarget(prometheus.target( + prometheus.withIntervalFactor(1),
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, prometheus.new('$datasource', 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
legendFormat='{{device}} written', + prometheus.withLegendFormat('{{device}} io time')
intervalFactor=1, + prometheus.withIntervalFactor(1),
)) ])
.addTarget(prometheus.target( + tsStandardOptions.withOverrides(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, [
legendFormat='{{device}} io time', tsStandardOptions.override.byRegexp.new('/ read| written/')
intervalFactor=1, + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(
)) + tsStandardOptions.withUnit('Bps')
{ ),
seriesOverrides: [ tsStandardOptions.override.byRegexp.new('/ io time/')
{ + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(tsStandardOptions.withUnit('percentunit')),
alias: '/ read| written/', ]
yaxis: 1, ),
},
{
alias: '/ io time/',
yaxis: 2,
},
],
yaxes: [
self.yaxe(format='Bps'),
self.yaxe(format='percentunit'),
],
},
local diskSpaceUsage = local diskSpaceUsage =
table.new( table.new('Disk Space Usage')
title='Disk Space Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + table.standardOptions.withUnit('decbytes')
) + table.standardOptions.thresholds.withSteps(
.setFieldConfig(unit='decbytes') [
.addThresholdStep(color='green', value=null) tableStep.withColor('green'),
.addThresholdStep(color='yellow', value=0.8) tableStep.withColor('yellow') + gaugeStep.withValue(0.8),
.addThresholdStep(color='red', value=0.9) tableStep.withColor('red') + gaugeStep.withValue(0.9),
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addOverride(
matcher={
id: 'byName',
options: 'Mounted on',
},
properties=[
{
id: 'custom.width',
value: 260,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Size',
},
properties=[
{
id: 'custom.width',
value: 93,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Used',
},
properties=[
{
id: 'custom.width',
value: 72,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Available',
},
properties=[
{
id: 'custom.width',
value: 88,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Used, %',
},
properties=[
{
id: 'unit',
value: 'percentunit',
},
{
id: 'custom.displayMode',
value: 'gradient-gauge',
},
{
id: 'max',
value: 1,
},
{
id: 'min',
value: 0,
},
] ]
) )
+ { span: 6 } + table.queryOptions.withTargets([
+ { prometheus.new(
transformations: [ '$datasource',
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config
)
+ prometheus.withLegendFormat('')
+ prometheus.withInstant()
+ prometheus.withFormat('table'),
prometheus.new(
'$datasource',
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config
)
+ prometheus.withLegendFormat('')
+ prometheus.withInstant()
+ prometheus.withFormat('table'),
])
+ table.standardOptions.withOverrides([
tableOverride.byName.new('Mounted on')
+ tableOverride.byName.withProperty('custom.width', 260),
tableOverride.byName.new('Size')
+ tableOverride.byName.withProperty('custom.width', 93),
tableOverride.byName.new('Used')
+ tableOverride.byName.withProperty('custom.width', 72),
tableOverride.byName.new('Available')
+ tableOverride.byName.withProperty('custom.width', 88),
tableOverride.byName.new('Used, %')
+ tableOverride.byName.withProperty('unit', 'percentunit')
+ tableOverride.byName.withPropertiesFromOptions(
table.fieldConfig.defaults.custom.withCellOptions(
{ type: 'gauge' },
)
)
+ tableOverride.byName.withProperty('max', 1)
+ tableOverride.byName.withProperty('min', 0),
])
+ table.queryOptions.withTransformations([
tableTransformation.withId('groupBy')
+ tableTransformation.withOptions(
{ {
id: 'groupBy',
options: {
fields: { fields: {
'Value #A': { 'Value #A': {
aggregations: [ aggregations: [
@ -386,15 +360,12 @@ local table = grafana70.panel.table;
operation: 'groupby', operation: 'groupby',
}, },
}, },
}, }
}, ),
tableTransformation.withId('merge'),
tableTransformation.withId('calculateField')
+ tableTransformation.withOptions(
{ {
id: 'merge',
options: {},
},
{
id: 'calculateField',
options: {
alias: 'Used', alias: 'Used',
binary: { binary: {
left: 'Value #A (lastNotNull)', left: 'Value #A (lastNotNull)',
@ -406,11 +377,11 @@ local table = grafana70.panel.table;
reduce: { reduce: {
reducer: 'sum', reducer: 'sum',
}, },
}, }
}, ),
tableTransformation.withId('calculateField')
+ tableTransformation.withOptions(
{ {
id: 'calculateField',
options: {
alias: 'Used, %', alias: 'Used, %',
binary: { binary: {
left: 'Used', left: 'Used',
@ -422,11 +393,11 @@ local table = grafana70.panel.table;
reduce: { reduce: {
reducer: 'sum', reducer: 'sum',
}, },
}, }
}, ),
tableTransformation.withId('organize')
+ tableTransformation.withOptions(
{ {
id: 'organize',
options: {
excludeByName: {}, excludeByName: {},
indexByName: {}, indexByName: {},
renameByName: { renameByName: {
@ -434,127 +405,131 @@ local table = grafana70.panel.table;
'Value #B (lastNotNull)': 'Available', 'Value #B (lastNotNull)': 'Available',
mountpoint: 'Mounted on', mountpoint: 'Mounted on',
}, },
}, }
}, ),
tableTransformation.withId('sortBy')
+ tableTransformation.withOptions(
{ {
id: 'sortBy',
options: {
fields: {}, fields: {},
sort: [ sort: [
{ {
field: 'Mounted on', field: 'Mounted on',
}, },
], ],
}, }
}, ),
],
},
]),
local networkReceived = local networkReceived =
graphPanel.new( timeSeriesPanel.new('Network Received')
'Network Received', + timeSeriesPanel.panelOptions.withDescription('Network received (bits/s)')
description='Network received (bits/s)', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bps')
span=6, + tsStandardOptions.withMin(0)
format='bps', + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config)
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, + prometheus.withLegendFormat('{{device}}')
legendFormat='{{device}}', + prometheus.withIntervalFactor(1),
intervalFactor=1, ]),
)),
local networkTransmitted = local networkTransmitted =
graphPanel.new( timeSeriesPanel.new('Network Transmitted')
'Network Transmitted', + timeSeriesPanel.panelOptions.withDescription('Network transmitted (bits/s)')
description='Network transmitted (bits/s)', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bps')
span=6, + tsStandardOptions.withMin(0)
format='bps', + tsCustom.withFillOpacity(0)
min=0, + tsOptions.tooltip.withMode('multi')
fill=0, + tsQueryOptions.withTargets([
) prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config)
.addTarget(prometheus.target( + prometheus.withLegendFormat('{{device}}')
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, + prometheus.withIntervalFactor(1),
legendFormat='{{device}}', ]),
intervalFactor=1,
)),
local cpuRow = local cpuRow =
row.new('CPU') row.new('CPU')
.addPanel(idleCPU) + row.withPanels([
.addPanel(systemLoad), idleCPU,
systemLoad,
]),
local memoryRow = local memoryRow = [
row.new('Memory') row.new('Memory') + row.gridPos.withY(8),
.addPanel(memoryGraph) memoryGraph + row.gridPos.withX(0) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(18),
.addPanel(memoryGauge), memoryGauge + row.gridPos.withX(18) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(6),
],
local diskRow = local diskRow =
row.new('Disk') row.new('Disk')
.addPanel(diskIO) + row.withPanels([
.addPanel(diskSpaceUsage), diskIO,
diskSpaceUsage,
]),
local networkRow = local networkRow =
row.new('Network') row.new('Network')
.addPanel(networkReceived) + row.withPanels([
.addPanel(networkTransmitted), networkReceived,
networkTransmitted,
]),
local rows = local panels =
[ grafana.util.grid.makeGrid([
cpuRow, cpuRow,
memoryRow, ], panelWidth=12, panelHeight=7)
+ memoryRow
+ grafana.util.grid.makeGrid([
diskRow, diskRow,
networkRow, networkRow,
], ], panelWidth=12, panelHeight=7, startY=18),
local templates = local variables =
[ [
prometheusDatasourceTemplate, prometheusDatasourceVariable,
clusterTemplate, clusterVariable,
instanceTemplate, instanceVariable,
], ],
dashboard: if platform == 'Linux' then dashboard: if platform == 'Linux' then
dashboard.new( dashboard.new(
'%sNodes' % config.dashboardNamePrefix, '%sNodes' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows) + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels)
else if platform == 'Darwin' then else if platform == 'Darwin' then
dashboard.new( dashboard.new(
'%sMacOS' % config.dashboardNamePrefix, '%sMacOS' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows) + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels)
else if platform == 'AIX' then else if platform == 'AIX' then
dashboard.new( dashboard.new(
'%sAIX' % config.dashboardNamePrefix, '%sAIX' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows), + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels),
}, },
} }