mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-12-28 06:59:44 -08:00
Responses to review comments, round 3
Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
parent
3a770a0b1d
commit
706511a495
|
@ -5,12 +5,17 @@
|
||||||
// Select the metrics coming from the node exporter.
|
// Select the metrics coming from the node exporter.
|
||||||
nodeExporterSelector: 'job="node"',
|
nodeExporterSelector: 'job="node"',
|
||||||
|
|
||||||
// Select the fstype for filesystem-related queries.
|
// Select the fstype for filesystem-related queries. If left
|
||||||
// TODO: What is a good default selector here?
|
// empty, all filesystems are selected. If you have unusual
|
||||||
fsSelector: 'fstype=~"ext.|xfs|jfs|btrfs|vfat|ntfs"',
|
// filesystem you don't want to include in dashboards and
|
||||||
|
// alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'.
|
||||||
|
fsSelector: '',
|
||||||
|
|
||||||
// Select the device for disk-related queries.
|
// Select the device for disk-related queries. If left empty, all
|
||||||
diskDeviceSelector: 'device=~"(sd|xvd).+"',
|
// devices are selected. If you have unusual devices you don't
|
||||||
|
// want to include in dashboards and alerting, you can exclude
|
||||||
|
// them here, e.g. 'device!="tmpfs"'.
|
||||||
|
diskDeviceSelector: '',
|
||||||
|
|
||||||
grafana_prefix: '',
|
grafana_prefix: '',
|
||||||
},
|
},
|
||||||
|
|
|
@ -22,7 +22,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
.addTarget(prometheus.target(
|
.addTarget(prometheus.target(
|
||||||
// TODO: Consider using `${__interval}` as range and a 1m min step.
|
// TODO: Consider using `${__interval}` as range and a 1m min step.
|
||||||
|||
|
|||
|
||||||
1 - avg by (cpu) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m]))
|
1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m])
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
legendFormat='{{cpu}}',
|
legendFormat='{{cpu}}',
|
||||||
intervalFactor=10,
|
intervalFactor=10,
|
||||||
|
@ -64,15 +64,18 @@ local gauge = promgrafonnet.gauge;
|
||||||
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
|
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
|
||||||
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
|
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
|
||||||
|
|
||||||
|
// TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%.
|
||||||
|
// This needs to be added upstream in the promgrafonnet library and then changed here.
|
||||||
local memoryGauge = gauge.new(
|
local memoryGauge = gauge.new(
|
||||||
'Memory Usage',
|
'Memory Usage',
|
||||||
|||
|
|||
|
||||||
|
100 -
|
||||||
(
|
(
|
||||||
node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
||||||
/
|
/
|
||||||
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
||||||
)
|
|
||||||
* 100
|
* 100
|
||||||
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
).withLowerBeingBetter();
|
).withLowerBeingBetter();
|
||||||
|
|
||||||
|
@ -82,10 +85,11 @@ local gauge = promgrafonnet.gauge;
|
||||||
datasource='$datasource',
|
datasource='$datasource',
|
||||||
span=9,
|
span=9,
|
||||||
)
|
)
|
||||||
|
// TODO: Does it make sense to have those three in the same panel?
|
||||||
// TODO: Consider using `${__interval}` as range and a 1m min step.
|
// TODO: Consider using `${__interval}` as range and a 1m min step.
|
||||||
.addTarget(prometheus.target('sum by (instance, device) (rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='{{device}} read'))
|
.addTarget(prometheus.target('rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m])' % $._config, legendFormat='{{device}} read'))
|
||||||
.addTarget(prometheus.target('sum by (instance, device) (rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='{{device}} written'))
|
.addTarget(prometheus.target('rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m])' % $._config, legendFormat='{{device}} written'))
|
||||||
.addTarget(prometheus.target('sum by (instance, device) (rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='{{device}} io time')) +
|
.addTarget(prometheus.target('rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s, instance="$instance"}[1m])' % $._config, legendFormat='{{device}} io time')) +
|
||||||
{
|
{
|
||||||
seriesOverrides: [
|
seriesOverrides: [
|
||||||
{
|
{
|
||||||
|
@ -103,6 +107,8 @@ local gauge = promgrafonnet.gauge;
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%.
|
||||||
|
// This needs to be added upstream in the promgrafonnet library and then changed here.
|
||||||
// TODO: Should this be partitioned by mountpoint?
|
// TODO: Should this be partitioned by mountpoint?
|
||||||
local diskSpaceUsage = gauge.new(
|
local diskSpaceUsage = gauge.new(
|
||||||
'Disk Space Usage',
|
'Disk Space Usage',
|
||||||
|
@ -158,7 +164,7 @@ local gauge = promgrafonnet.gauge;
|
||||||
template.new(
|
template.new(
|
||||||
'instance',
|
'instance',
|
||||||
'$datasource',
|
'$datasource',
|
||||||
'label_values(node_boot_time_seconds{%(nodeExporterSelector)s}, instance)' % $._config,
|
'label_values(node_exporter_build_info{%(nodeExporterSelector)s}, instance)' % $._config,
|
||||||
refresh='time',
|
refresh='time',
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -12,13 +12,13 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.panel('CPU Utilisation') +
|
g.panel('CPU Utilisation') +
|
||||||
g.queryPanel(|||
|
g.queryPanel(|||
|
||||||
(
|
(
|
||||||
instance:node_cpu_utilisation:avg_rate1m
|
instance:node_cpu_utilisation:avg_rate1m{%(nodeExporterSelector)s}
|
||||||
*
|
*
|
||||||
instance:node_num_cpu:sum
|
instance:node_num_cpu:sum{%(nodeExporterSelector)s}
|
||||||
/ ignoring (instance) group_left
|
/ ignoring (instance) group_left
|
||||||
sum without (instance) (instance:node_num_cpu:sum)
|
sum without (instance) (instance:node_num_cpu:sum{%(nodeExporterSelector)s})
|
||||||
)
|
)
|
||||||
|||, '{{instance}}', legendLink) +
|
||| % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||||
)
|
)
|
||||||
|
@ -27,11 +27,11 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.panel('CPU Saturation (load1 per CPU)') +
|
g.panel('CPU Saturation (load1 per CPU)') +
|
||||||
g.queryPanel(|||
|
g.queryPanel(|||
|
||||||
(
|
(
|
||||||
instance:node_load1_per_cpu:ratio
|
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
|
||||||
/ ignoring (instance) group_left
|
/ ignoring (instance) group_left
|
||||||
count without (instance) (instance:node_load1_per_cpu:ratio)
|
count without (instance) (instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})
|
||||||
)
|
)
|
||||||
|||, '{{instance}}', legendLink) +
|
||| % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
// TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
|
// TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
|
||||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||||
|
@ -41,13 +41,13 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.row('Memory')
|
g.row('Memory')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Utilisation') +
|
g.panel('Memory Utilisation') +
|
||||||
g.queryPanel('instance:node_memory_utilisation:ratio', '{{instance}}', legendLink) +
|
g.queryPanel('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Saturation (Swap I/O)') +
|
g.panel('Memory Saturation (Swap I/O)') +
|
||||||
g.queryPanel('instance:node_memory_swap_io_bytes:sum_rate', '{{instance}}', legendLink) +
|
g.queryPanel('instance:node_memory_swap_io_bytes:sum_rate{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
|
@ -60,11 +60,11 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
// 1 second per second doing I/O, normalize by metric cardinality for stacked charts.
|
// 1 second per second doing I/O, normalize by metric cardinality for stacked charts.
|
||||||
g.queryPanel(|||
|
g.queryPanel(|||
|
||||||
(
|
(
|
||||||
instance:node_disk_io_time:sum_rate1m
|
instance:node_disk_io_time_seconds:sum_rate1m{%(nodeExporterSelector)s}
|
||||||
/ ignoring (instance) group_left
|
/ ignoring (instance) group_left
|
||||||
count without (instance) (instance:node_disk_io_time:sum_rate1m)
|
count without (instance) (instance:node_disk_io_time_seconds:sum_rate1m{%(nodeExporterSelector)s})
|
||||||
)
|
)
|
||||||
|||, '{{instance}}', legendLink) +
|
||| % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||||
)
|
)
|
||||||
|
@ -72,11 +72,11 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.panel('Disk IO Saturation') +
|
g.panel('Disk IO Saturation') +
|
||||||
g.queryPanel(|||
|
g.queryPanel(|||
|
||||||
(
|
(
|
||||||
instance:node_disk_io_time_weighted:sum_rate1m
|
instance:node_disk_io_time_weighted_seconds:sum_rate1m{%(nodeExporterSelector)s}
|
||||||
/ ignoring (instance) group_left
|
/ ignoring (instance) group_left
|
||||||
count without (instance) (instance:node_disk_io_time_weighted:sum_rate1m)
|
count without (instance) (instance:node_disk_io_time_weighted_seconds:sum_rate1m{%(nodeExporterSelector)s})
|
||||||
)
|
)
|
||||||
|||, '{{instance}}', legendLink) +
|
||| % $._config, '{{instance}}', legendLink) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
|
||||||
)
|
)
|
||||||
|
@ -84,16 +84,30 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('Network')
|
g.row('Network')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Net Utilisation (Transmitted)') +
|
g.panel('Net Utilisation (Bytes Receive/Transmit)') +
|
||||||
g.queryPanel('instance:node_net_utilisation:sum_irate', '{{instance}}', legendLink) +
|
g.queryPanel(
|
||||||
|
[
|
||||||
|
'instance:node_network_receive_bytes:sum_rate1m{%(nodeExporterSelector)s}' % $._config,
|
||||||
|
'-instance:node_network_transmit_bytes:sum_rate1m{%(nodeExporterSelector)s}' % $._config,
|
||||||
|
],
|
||||||
|
['{{instance}} Receive', '{{instance}} Transmit'],
|
||||||
|
legendLink,
|
||||||
|
) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Net Saturation (Dropped)') +
|
g.panel('Net Saturation (Drops Receive/Transmit)') +
|
||||||
g.queryPanel('instance:node_net_saturation:sum_irate', '{{instance}}', legendLink) +
|
g.queryPanel(
|
||||||
|
[
|
||||||
|
'instance:node_network_receive_drop:sum_rate1m{%(nodeExporterSelector)s}' % $._config,
|
||||||
|
'-instance:node_network_transmit_drop:sum_rate1m{%(nodeExporterSelector)s}' % $._config,
|
||||||
|
],
|
||||||
|
['{{instance}} Receive', '{{instance}} Transmit'],
|
||||||
|
legendLink,
|
||||||
|
) +
|
||||||
g.stack +
|
g.stack +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('rps') },
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
|
@ -127,12 +141,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.row('CPU')
|
g.row('CPU')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Utilisation') +
|
g.panel('CPU Utilisation') +
|
||||||
g.queryPanel('instance:node_cpu_utilisation:avg_rate1m{instance="$instance"}', 'Utilisation') +
|
g.queryPanel('instance:node_cpu_utilisation:avg_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('CPU Saturation (Load1)') +
|
g.panel('CPU Saturation (Load1)') +
|
||||||
g.queryPanel('instance:node_cpu_saturation_load1:{instance="$instance"}', 'Saturation') +
|
g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -140,12 +154,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.row('Memory')
|
g.row('Memory')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Utilisation') +
|
g.panel('Memory Utilisation') +
|
||||||
g.queryPanel('instance:node_memory_utilisation:ratio{instance="$instance"}', 'Memory') +
|
g.queryPanel('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Memory') +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Memory Saturation (pages swapped per second)') +
|
g.panel('Memory Saturation (pages swapped per second)') +
|
||||||
g.queryPanel('instance:node_memory_swap_io_pages:rate1m{instance="$instance"}', 'Swap IO') +
|
g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') +
|
||||||
{ yaxes: g.yaxes('short') },
|
{ yaxes: g.yaxes('short') },
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -153,12 +167,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
g.row('Disk')
|
g.row('Disk')
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Disk IO Utilisation') +
|
g.panel('Disk IO Utilisation') +
|
||||||
g.queryPanel('instance:node_disk_io_time:sum_rate1m{instance="$instance"}', 'Utilisation') +
|
g.queryPanel('instance:node_disk_io_time_seconds:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Disk IO Saturation') +
|
g.panel('Disk IO Saturation') +
|
||||||
g.queryPanel('instance:node_disk_io_time_weighted:sum_rate1m{instance="$instance"}', 'Saturation') +
|
g.queryPanel('instance:node_disk_io_time_weighted_seconds:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') +
|
||||||
{ yaxes: g.yaxes('percentunit') },
|
{ yaxes: g.yaxes('percentunit') },
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -167,7 +181,10 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Net Utilisation (Bytes Receive/Transmit)') +
|
g.panel('Net Utilisation (Bytes Receive/Transmit)') +
|
||||||
g.queryPanel(
|
g.queryPanel(
|
||||||
['node_network_receive_bytes_total{instance="$instance"}', '-node_network_transmit_bytes_total{instance="$instance"}'],
|
[
|
||||||
|
'instance:node_network_receive_bytes:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
|
||||||
|
'-instance:node_network_transmit_bytes:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
|
||||||
|
],
|
||||||
['Receive', 'Transmit'],
|
['Receive', 'Transmit'],
|
||||||
) +
|
) +
|
||||||
{ yaxes: g.yaxes('Bps') },
|
{ yaxes: g.yaxes('Bps') },
|
||||||
|
@ -175,7 +192,10 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
.addPanel(
|
.addPanel(
|
||||||
g.panel('Net Saturation (Drops Receive/Transmit)') +
|
g.panel('Net Saturation (Drops Receive/Transmit)') +
|
||||||
g.queryPanel(
|
g.queryPanel(
|
||||||
['node_network_receive_drop_total{instance="$instance"}', '-node_network_transmit_drop_total{instance="$instance"}'],
|
[
|
||||||
|
'instance:node_network_receive_drop:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
|
||||||
|
'-instance:node_network_transmit_drop:sum_rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
|
||||||
|
],
|
||||||
['Receive drops', 'Transmit drops'],
|
['Receive drops', 'Transmit drops'],
|
||||||
) +
|
) +
|
||||||
{ yaxes: g.yaxes('rps') },
|
{ yaxes: g.yaxes('rps') },
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
record: 'instance:node_num_cpu:sum',
|
record: 'instance:node_num_cpu:sum',
|
||||||
expr: |||
|
expr: |||
|
||||||
count without (cpu) (
|
count without (cpu) (
|
||||||
sum without (mode) (
|
count without (mode) (
|
||||||
node_cpu_seconds_total{%(nodeExporterSelector)s}
|
node_cpu_seconds_total{%(nodeExporterSelector)s}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -26,7 +26,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// This is CPU saturation: 1min avg run queue length / number of CPUs.
|
// This is CPU saturation: 1min avg run queue length / number of CPUs.
|
||||||
// Can go over 1. >1 is bad.
|
// Can go over 1.
|
||||||
|
// TODO: There are situation where a run queue >1/core is just normal and fine.
|
||||||
|
// We need to clarify how to lead this metric and if its usage is helpful at all.
|
||||||
record: 'instance:node_load1_per_cpu:ratio',
|
record: 'instance:node_load1_per_cpu:ratio',
|
||||||
expr: |||
|
expr: |||
|
||||||
(
|
(
|
||||||
|
@ -59,7 +61,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// Disk utilisation (seconds spent, 1 second rate)
|
// Disk utilisation (seconds spent, 1 second rate)
|
||||||
record: 'instance:node_disk_io_time:sum_rate1m',
|
// TODO: This should probably not aggregate over all devices but
|
||||||
|
// keep them separate.
|
||||||
|
record: 'instance:node_disk_io_time_seconds:sum_rate1m',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum without (device) (
|
sum without (device) (
|
||||||
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])
|
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])
|
||||||
|
@ -68,7 +72,9 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// Disk saturation (weighted seconds spent, 1 second rate)
|
// Disk saturation (weighted seconds spent, 1 second rate)
|
||||||
record: 'instance:node_disk_io_time_weighted:sum_rate1m',
|
// TODO: This should probably not aggregate over all devices but
|
||||||
|
// keep them separate.
|
||||||
|
record: 'instance:node_disk_io_time_weighted_seconds:sum_rate1m',
|
||||||
expr: |||
|
expr: |||
|
||||||
sum without (device) (
|
sum without (device) (
|
||||||
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])
|
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m])
|
||||||
|
@ -93,6 +99,7 @@
|
||||||
)
|
)
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
|
// TODO: Find out if those drops ever happen on modern switched networks.
|
||||||
{
|
{
|
||||||
record: 'instance:node_network_receive_drop:sum_rate1m',
|
record: 'instance:node_network_receive_drop:sum_rate1m',
|
||||||
expr: |||
|
expr: |||
|
||||||
|
|
Loading…
Reference in a new issue