mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	This should be the way forward when importing libraries in jsonnet. It's closer to how Go imports look and makes it more obvious where packages live. This is not breaking anything, as the old imports were already symlinks to the now directly used directories. Signed-off-by: Matthias Loibl <mail@matthiasloibl.com>
		
			
				
	
	
		
			276 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			276 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet';
 | |
| 
 | |
| {
 | |
|   grafanaDashboards+:: {
 | |
|     'node-cluster-rsrc-use.json':
 | |
|       local legendLink = '%s/dashboard/file/node-rsrc-use.json' % $._config.grafana_prefix;
 | |
| 
 | |
|       g.dashboard('USE Method / Cluster')
 | |
|       .addRow(
 | |
|         g.row('CPU')
 | |
|         .addPanel(
 | |
|           g.panel('CPU Utilisation') +
 | |
|           g.queryPanel(|||
 | |
|             (
 | |
|               instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s}
 | |
|             *
 | |
|               instance:node_num_cpu:sum{%(nodeExporterSelector)s}
 | |
|             )
 | |
|             / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
 | |
|           ||| % $._config, '{{instance}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         )
 | |
|         .addPanel(
 | |
|           // TODO: Is this a useful panel? At least there should be some explanation how load
 | |
|           // average relates to the "CPU saturation" in the title.
 | |
|           g.panel('CPU Saturation (load1 per CPU)') +
 | |
|           g.queryPanel(|||
 | |
|             instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
 | |
|             / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
 | |
|           ||| % $._config, '{{instance}}', legendLink) +
 | |
|           g.stack +
 | |
|           // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Memory')
 | |
|         .addPanel(
 | |
|           g.panel('Memory Utilisation') +
 | |
|           g.queryPanel(|||
 | |
|             instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
 | |
|             / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
 | |
|           ||| % $._config, '{{instance}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Memory Saturation (Major Page Faults)') +
 | |
|           g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes('rps') },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Network')
 | |
|         .addPanel(
 | |
|           g.panel('Net Utilisation (Bytes Receive/Transmit)') +
 | |
|           g.queryPanel(
 | |
|             [
 | |
|               'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
 | |
|               'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
 | |
|             ],
 | |
|             ['{{instance}} Receive', '{{instance}} Transmit'],
 | |
|             legendLink,
 | |
|           ) +
 | |
|           g.stack +
 | |
|           {
 | |
|             yaxes: g.yaxes({ format: 'Bps', min: null }),
 | |
|             seriesOverrides: [
 | |
|               {
 | |
|                 alias: '/ Receive/',
 | |
|                 stack: 'A',
 | |
|               },
 | |
|               {
 | |
|                 alias: '/ Transmit/',
 | |
|                 stack: 'B',
 | |
|                 transform: 'negative-Y',
 | |
|               },
 | |
|             ],
 | |
|           },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Net Saturation (Drops Receive/Transmit)') +
 | |
|           g.queryPanel(
 | |
|             [
 | |
|               'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
 | |
|               'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
 | |
|             ],
 | |
|             ['{{instance}} Receive', '{{instance}} Transmit'],
 | |
|             legendLink,
 | |
|           ) +
 | |
|           g.stack +
 | |
|           {
 | |
|             yaxes: g.yaxes({ format: 'rps', min: null }),
 | |
|             seriesOverrides: [
 | |
|               {
 | |
|                 alias: '/ Receive/',
 | |
|                 stack: 'A',
 | |
|               },
 | |
|               {
 | |
|                 alias: '/ Transmit/',
 | |
|                 stack: 'B',
 | |
|                 transform: 'negative-Y',
 | |
|               },
 | |
|             ],
 | |
|           },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Disk IO')
 | |
|         .addPanel(
 | |
|           g.panel('Disk IO Utilisation') +
 | |
|           // Full utilisation would be all disks on each node spending an average of
 | |
|           // 1 second per second doing I/O, normalize by metric cardinality for stacked charts.
 | |
|           // TODO: Does the partition by device make sense? Using the most utilized device per
 | |
|           // instance might make more sense.
 | |
|           g.queryPanel(|||
 | |
|             instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
 | |
|             / scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}))
 | |
|           ||| % $._config, '{{instance}} {{device}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Disk IO Saturation') +
 | |
|           g.queryPanel(|||
 | |
|             instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}
 | |
|             / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}))
 | |
|           ||| % $._config, '{{instance}} {{device}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Disk Space')
 | |
|         .addPanel(
 | |
|           g.panel('Disk Space Utilisation') +
 | |
|           g.queryPanel(|||
 | |
|             sum without (device) (
 | |
|               max without (fstype, mountpoint) (
 | |
|                 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
 | |
|               )
 | |
|             ) 
 | |
|             / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})))
 | |
|           ||| % $._config, '{{instance}}', legendLink) +
 | |
|           g.stack +
 | |
|           { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
 | |
|         ),
 | |
|       ),
 | |
| 
 | |
|     'node-rsrc-use.json':
 | |
|       g.dashboard('USE Method / Node')
 | |
|       .addTemplate('instance', 'up{%(nodeExporterSelector)s}' % $._config, 'instance')
 | |
|       .addRow(
 | |
|         g.row('CPU')
 | |
|         .addPanel(
 | |
|           g.panel('CPU Utilisation') +
 | |
|           g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
 | |
|           {
 | |
|             yaxes: g.yaxes('percentunit'),
 | |
|             legend+: { show: false },
 | |
|           },
 | |
|         )
 | |
|         .addPanel(
 | |
|           // TODO: Is this a useful panel? At least there should be some explanation how load
 | |
|           // average relates to the "CPU saturation" in the title.
 | |
|           g.panel('CPU Saturation (Load1 per CPU)') +
 | |
|           g.queryPanel('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') +
 | |
|           {
 | |
|             yaxes: g.yaxes('percentunit'),
 | |
|             legend+: { show: false },
 | |
|           },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Memory')
 | |
|         .addPanel(
 | |
|           g.panel('Memory Utilisation') +
 | |
|           g.queryPanel('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Memory') +
 | |
|           { yaxes: g.yaxes('percentunit') },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Memory Saturation (Major Page Faults)') +
 | |
|           g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') +
 | |
|           {
 | |
|             yaxes: g.yaxes('short'),
 | |
|             legend+: { show: false },
 | |
|           },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Net')
 | |
|         .addPanel(
 | |
|           g.panel('Net Utilisation (Bytes Receive/Transmit)') +
 | |
|           g.queryPanel(
 | |
|             [
 | |
|               'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
 | |
|               'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
 | |
|             ],
 | |
|             ['Receive', 'Transmit'],
 | |
|           ) +
 | |
|           {
 | |
|             yaxes: g.yaxes({ format: 'Bps', min: null }),
 | |
|             seriesOverrides: [
 | |
|               {
 | |
|                 alias: '/Receive/',
 | |
|                 stack: 'A',
 | |
|               },
 | |
|               {
 | |
|                 alias: '/Transmit/',
 | |
|                 stack: 'B',
 | |
|                 transform: 'negative-Y',
 | |
|               },
 | |
|             ],
 | |
|           },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Net Saturation (Drops Receive/Transmit)') +
 | |
|           g.queryPanel(
 | |
|             [
 | |
|               'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
 | |
|               'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
 | |
|             ],
 | |
|             ['Receive drops', 'Transmit drops'],
 | |
|           ) +
 | |
|           {
 | |
|             yaxes: g.yaxes({ format: 'rps', min: null }),
 | |
|             seriesOverrides: [
 | |
|               {
 | |
|                 alias: '/Receive/',
 | |
|                 stack: 'A',
 | |
|               },
 | |
|               {
 | |
|                 alias: '/Transmit/',
 | |
|                 stack: 'B',
 | |
|                 transform: 'negative-Y',
 | |
|               },
 | |
|             ],
 | |
|           },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Disk IO')
 | |
|         .addPanel(
 | |
|           g.panel('Disk IO Utilisation') +
 | |
|           g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
 | |
|           { yaxes: g.yaxes('percentunit') },
 | |
|         )
 | |
|         .addPanel(
 | |
|           g.panel('Disk IO Saturation') +
 | |
|           g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
 | |
|           { yaxes: g.yaxes('percentunit') },
 | |
|         )
 | |
|       )
 | |
|       .addRow(
 | |
|         g.row('Disk Space')
 | |
|         .addPanel(
 | |
|           g.panel('Disk Space Utilisation') +
 | |
|           g.queryPanel(|||
 | |
|             1 -
 | |
|             (
 | |
|               max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"})
 | |
|             /
 | |
|               max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"})
 | |
|             )
 | |
|           ||| % $._config, '{{device}}') +
 | |
|           {
 | |
|             yaxes: g.yaxes('percentunit'),
 | |
|             legend+: { show: false },
 | |
|           },
 | |
|         ),
 | |
|       ),
 | |
|   },
 | |
| }
 |