Fix up some of the USE metrics.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
Tom Wilkie 2018-05-10 11:35:48 +02:00 committed by Matthias Loibl
parent c34275d6e5
commit 642f67ffa1
No known key found for this signature in database
GPG key ID: B1C7DF661ABB2C1A
2 changed files with 21 additions and 36 deletions

View file

@ -45,7 +45,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
// Full utilisation would be all disks on each node spending an average of
// 1 sec per second doing I/O, normalize by node count for stacked charts
g.queryPanel(|||
instance:node_disk_utilisation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
instance:node_disk_utilisation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -53,7 +53,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel(|||
instance:node_disk_saturation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
instance:node_disk_saturation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -104,7 +104,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Memory')
.addPanel(
g.panel('Memory Utilisation') +
g.queryPanel('instance:node_memory_utilisation:{instance="$instance"}', 'Memory') +
g.queryPanel('instance:node_memory_utilisation:ratio{instance="$instance"}', 'Memory') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
@ -117,12 +117,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Disk')
.addPanel(
g.panel('Disk IO Utilisation') +
g.queryPanel('instance:node_disk_utilisation:avg_irate{instance="$instance"}', 'Utilisation') +
g.queryPanel('instance:node_disk_utilisation:sum_irate{instance="$instance"}', 'Utilisation') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel('instance:node_disk_saturation:avg_irate{instance="$instance"}', 'Saturation') +
g.queryPanel('instance:node_disk_saturation:sum_irate{instance="$instance"}', 'Saturation') +
{ yaxes: g.yaxes('percentunit') },
)
)

View file

@ -29,20 +29,9 @@
// Can go over 100%. >100% is bad.
record: 'instance:node_cpu_saturation_load1:',
expr: |||
sum by (instance) (
node_load1{%(nodeExporterSelector)s}
)
sum by (instance) (node_load1{%(nodeExporterSelector)s})
/
instance:node_num_cpu:sum
||| % $._config,
},
{
// Available memory per node
record: 'instance:node_memory_bytes_available:sum',
expr: |||
sum by (instance) (
(node_memory_MemFree{%(nodeExporterSelector)s} + node_memory_Cached{%(nodeExporterSelector)s} + node_memory_Buffers{%(nodeExporterSelector)s})
)
instance:node_num_cpu:sum
||| % $._config,
},
{
@ -58,17 +47,13 @@
// Memory utilisation per node, normalized by per-node memory
record: 'instance:node_memory_utilisation:ratio',
expr: |||
(instance:node_memory_bytes_total:sum - instance:node_memory_bytes_available:sum)
/
scalar(sum(instance:node_memory_bytes_total:sum))
1 - (
node_memory_MemAvailable{%(nodeExporterSelector)s}
/
node_memory_MemTotal{%(nodeExporterSelector)s}
)
|||,
},
{
record: 'instance:node_memory_utilisation:',
expr: |||
1 - (instance:node_memory_bytes_available:sum / instance:node_memory_bytes_total:sum)
||| % $._config,
},
{
record: 'instance:node_memory_swap_io_bytes:sum_rate',
expr: |||
@ -79,19 +64,19 @@
||| % $._config,
},
{
// Disk utilisation (ms spent, by rate() it's bound by 1 second)
record: 'instance:node_disk_utilisation:avg_irate',
// Disk utilisation (ms spent, 1 second irate())
record: 'instance:node_disk_utilisation:sum_irate',
expr: |||
avg by (instance) (
sum by (instance) (
irate(node_disk_io_time_ms{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
)
||| % $._config,
},
{
// Disk saturation (ms spent, by rate() it's bound by 1 second)
record: 'instance:node_disk_saturation:avg_irate',
record: 'instance:node_disk_saturation:sum_irate',
expr: |||
avg by (instance) (
sum by (instance) (
irate(node_disk_io_time_weighted{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
)
||| % $._config,
@ -100,8 +85,8 @@
record: 'instance:node_net_utilisation:sum_irate',
expr: |||
sum by (instance) (
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]) +
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]))
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
)
||| % $._config,
},
@ -109,8 +94,8 @@
record: 'instance:node_net_saturation:sum_irate',
expr: |||
sum by (instance) (
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device="eth0"}[1m]) +
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device="eth0"}[1m]))
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
)
||| % $._config,
},