Beginnings of a node-exporter monitoring mixin.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
Tom Wilkie 2018-05-08 12:10:29 +02:00 committed by Matthias Loibl
parent 17fee8081f
commit bafe1707f1
No known key found for this signature in database
GPG key ID: B1C7DF661ABB2C1A
11 changed files with 767 additions and 0 deletions

View file

@ -0,0 +1,165 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'node',
rules: [
{
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
predict_linear(node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 24*60*60) < 0
AND
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.4
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of space within the next 24 hours.',
},
},
{
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
predict_linear(node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 4*60*60) < 0
AND
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.2
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'critical',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of space within the next 4 hours.',
},
},
{
alert: 'NodeFilesystemOutOfSpace',
expr: |||
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 5
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
},
},
{
alert: 'NodeFilesystemOutOfSpace',
expr: |||
node_filesystem_avail{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_size{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 3
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'critical',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
},
},
{
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 24*60*60) < 0
AND
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.4
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of files within the next 24 hours.',
},
},
{
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s}[6h], 4*60*60) < 0
AND
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} < 0.2
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted to run out of files within the next 4 hours.',
},
},
{
alert: 'NodeFilesystemOutOfFiles',
expr: |||
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 5
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available inodes left.',
},
},
{
alert: 'NodeFilesystemOutOfSpace',
expr: |||
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelectors)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelectors)s} * 100 < 3
AND
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelectors)s} == 0
||| % $._config,
'for': '1h',
labels: {
severity: 'critical',
},
annotations: {
message: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ $value }}% available space left.',
},
},
{
alert: 'NodeNetworkReceiveErrs',
expr: |||
increase(node_network_receive_errs[2m]) > 10
||| % $._config,
'for': '1h',
labels: {
severity: 'critical',
},
annotations: {
message: '{{ $labels.instance }} interface {{ $labels.device }} shows errors while receiving packets ({{ $value }} errors in two minutes).',
},
},
{
alert: 'NodeNetworkTransmitErrs',
expr: |||
increase(node_network_transmit_errs[2m]) > 10
||| % $._config,
'for': '1h',
labels: {
severity: 'critical',
},
annotations: {
message: '{{ $labels.instance }} interface {{ $labels.device }} shows errors while transmitting packets ({{ $value }} errors in two minutes).',
},
},
],
},
],
},
}

View file

@ -0,0 +1,11 @@
{
_config+:: {
// Selectors are inserted between {} in Prometheus queries.
nodeExporterSelector: 'job="node-exporter"',
// Mainly extracted because they are repetitive, but also useful to customize.
fsSelectors: 'fstype=~"ext.|xfs",mountpoint!="/var/lib/docker/aufs"',
grafana_prefix: '',
},
}

View file

@ -0,0 +1,2 @@
(import 'node.libsonnet') +
(import 'use.libsonnet')

View file

@ -0,0 +1,176 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
local gauge = promgrafonnet.gauge;
{
grafanaDashboards+:: {
'nodes.json':
local idleCPU =
graphPanel.new(
'Idle CPU',
datasource='$datasource',
span=6,
format='percent',
max=100,
min=0,
)
.addTarget(prometheus.target(
|||
100 - (avg by (cpu) (irate(node_cpu{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[5m])) * 100)
||| % $._config,
legendFormat='{{cpu}}',
intervalFactor=10,
));
local systemLoad =
graphPanel.new(
'System load',
datasource='$datasource',
span=6,
format='percent',
)
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 1m'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 5m'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 15m'));
local memoryGraph =
graphPanel.new(
'Memory Usage',
datasource='$datasource',
span=9,
format='bytes',
)
.addTarget(prometheus.target(
|||
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}
||| % $._config, legendFormat='memory used'
))
.addTarget(prometheus.target('node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
local memoryGauge = gauge.new(
'Memory Usage',
|||
(
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}
) * 100
/
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
||| % $._config,
).withLowerBeingBetter();
local diskIO =
graphPanel.new(
'Disk I/O',
datasource='$datasource',
span=9,
)
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_bytes_read{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='read'))
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_bytes_written{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='written'))
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_io_time_ms{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='io time')) +
{
seriesOverrides: [
{
alias: 'read',
yaxis: 1,
},
{
alias: 'io time',
yaxis: 2,
},
],
yaxes: [
self.yaxe(format='bytes'),
self.yaxe(format='ms'),
],
};
local diskSpaceUsage = gauge.new(
'Disk Space Usage',
|||
(
sum(node_filesystem_size{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
- sum(node_filesystem_free{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
) * 100
/
sum(node_filesystem_size{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
||| % $._config,
).withLowerBeingBetter();
local networkReceived =
graphPanel.new(
'Network Received',
datasource='$datasource',
span=6,
format='bytes',
)
.addTarget(prometheus.target('rate(node_network_receive_bytes{%(nodeExporterSelector)s, instance="$instance", device!~"lo"}[5m])' % $._config, legendFormat='{{device}}'));
local networkTransmitted =
graphPanel.new(
'Network Transmitted',
datasource='$datasource',
span=6,
format='bytes',
)
.addTarget(prometheus.target('rate(node_network_transmit_bytes{%(nodeExporterSelector)s, instance="$instance", device!~"lo"}[5m])' % $._config, legendFormat='{{device}}'));
dashboard.new('Nodes', time_from='now-1h')
.addTemplate(
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(node_boot_time{%(nodeExporterSelector)s}, instance)' % $._config,
refresh='time',
)
)
.addRow(
row.new()
.addPanel(idleCPU)
.addPanel(systemLoad)
)
.addRow(
row.new()
.addPanel(memoryGraph)
.addPanel(memoryGauge)
)
.addRow(
row.new()
.addPanel(diskIO)
.addPanel(diskSpaceUsage)
)
.addRow(
row.new()
.addPanel(networkReceived)
.addPanel(networkTransmitted)
),
},
}

View file

@ -0,0 +1,151 @@
local g = import 'grafana-builder/grafana.libsonnet';
{
grafanaDashboards+:: {
'node-cluster-rsrc-use.json':
local legendLink = '%s/dashboard/file/k8s-node-rsrc-use.json' % $._config.grafana_prefix;
g.dashboard('USE Method / Cluster')
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Utilisation') +
g.queryPanel('instance:node_cpu_utilisation:avg1m * instance:node_num_cpu:sum / scalar(sum(instance:node_num_cpu:sum))', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
.addPanel(
g.panel('CPU Saturation (Load1)') +
g.queryPanel(|||
instance:node_cpu_saturation_load1: / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
)
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Utilisation') +
g.queryPanel('instance:node_memory_utilisation:ratio', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
.addPanel(
g.panel('Memory Saturation (Swap I/O)') +
g.queryPanel('instance:node_memory_swap_io_bytes:sum_rate', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Disk')
.addPanel(
g.panel('Disk IO Utilisation') +
// Full utilisation would be all disks on each node spending an average of
// 1 sec per second doing I/O, normalize by node count for stacked charts
g.queryPanel(|||
instance:node_disk_utilisation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel(|||
instance:node_disk_saturation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
)
)
.addRow(
g.row('Network')
.addPanel(
g.panel('Net Utilisation (Transmitted)') +
g.queryPanel('instance:node_net_utilisation:sum_irate', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
.addPanel(
g.panel('Net Saturation (Dropped)') +
g.queryPanel('instance:node_net_saturation:sum_irate', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Storage')
.addPanel(
g.panel('Disk Capacity') +
g.queryPanel('sum(max(node_filesystem_size{fstype=~"ext[24]"} - node_filesystem_free{fstype=~"ext[24]"}) by (device,instance,namespace)) by (instance,namespace) / scalar(sum(max(node_filesystem_size{fstype=~"ext[24]"}) by (device,instance,namespace)))', '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
),
),
'k8s-node-rsrc-use.json':
g.dashboard('K8s / USE Method / Node')
.addTemplate('instance', 'up{%(nodeExporterSelector)s}' % $._config, 'instance')
.addRow(
g.row('CPU')
.addPanel(
g.panel('CPU Utilisation') +
g.queryPanel('instance:node_cpu_utilisation:avg1m{instance="$instance"}', 'Utilisation') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('CPU Saturation (Load1)') +
g.queryPanel('instance:node_cpu_saturation_load1:{instance="$instance"}', 'Saturation') +
{ yaxes: g.yaxes('percentunit') },
)
)
.addRow(
g.row('Memory')
.addPanel(
g.panel('Memory Utilisation') +
g.queryPanel('instance:node_memory_utilisation:{instance="$instance"}', 'Memory') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('Memory Saturation (Swap I/O)') +
g.queryPanel('instance:node_memory_swap_io_bytes:sum_rate{instance="$instance"}', 'Swap IO') +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Disk')
.addPanel(
g.panel('Disk IO Utilisation') +
g.queryPanel('instance:node_disk_utilisation:avg_irate{instance="$instance"}', 'Utilisation') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel('instance:node_disk_saturation:avg_irate{instance="$instance"}', 'Saturation') +
{ yaxes: g.yaxes('percentunit') },
)
)
.addRow(
g.row('Net')
.addPanel(
g.panel('Net Utilisation (Transmitted)') +
g.queryPanel('instance:node_net_utilisation:sum_irate{instance="$instance"}', 'Utilisation') +
{ yaxes: g.yaxes('Bps') },
)
.addPanel(
g.panel('Net Saturation (Dropped)') +
g.queryPanel('instance:node_net_saturation:sum_irate{instance="$instance"}', 'Saturation') +
{ yaxes: g.yaxes('Bps') },
)
)
.addRow(
g.row('Disk')
.addPanel(
g.panel('Disk Utilisation') +
g.queryPanel('1 - sum(max by (device, node) (node_filesystem_free{fstype=~"ext[24]"})) / sum(max by (device, node) (node_filesystem_size{fstype=~"ext[24]"}))', 'Disk') +
{ yaxes: g.yaxes('percentunit') },
),
),
},
}

View file

@ -0,0 +1,24 @@
{
"dependencies": [
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "master"
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/kausalco/public",
"subdir": "grafana-builder"
}
},
"version": "master"
}
]
}

View file

@ -0,0 +1,60 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local singlestat = grafana.singlestat;
local prometheus = grafana.prometheus;
{
new(title, query)::
singlestat.new(
title,
datasource='prometheus',
span=3,
format='percent',
valueName='current',
colors=[
'rgba(245, 54, 54, 0.9)',
'rgba(237, 129, 40, 0.89)',
'rgba(50, 172, 45, 0.97)',
],
thresholds='50, 80',
valueMaps=[
{
op: '=',
text: 'N/A',
value: 'null',
},
],
)
.addTarget(
prometheus.target(
query
)
) + {
gauge: {
maxValue: 100,
minValue: 0,
show: true,
thresholdLabels: false,
thresholdMarkers: true,
},
withTextNullValue(text):: self {
valueMaps: [
{
op: '=',
text: text,
value: 'null',
},
],
},
withSpanSize(size):: self {
span: size,
},
withLowerBeingBetter():: self {
colors: [
'rgba(50, 172, 45, 0.97)',
'rgba(237, 129, 40, 0.89)',
'rgba(245, 54, 54, 0.9)',
],
thresholds: '80, 90',
},
},
}

View file

@ -0,0 +1,48 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local singlestat = grafana.singlestat;
local prometheus = grafana.prometheus;
{
new(title, query)::
singlestat.new(
title,
datasource='prometheus',
span=3,
valueName='current',
valueMaps=[
{
op: '=',
text: '0',
value: 'null',
},
],
)
.addTarget(
prometheus.target(
query
)
) + {
withTextNullValue(text):: self {
valueMaps: [
{
op: '=',
text: text,
value: 'null',
},
],
},
withSpanSize(size):: self {
span: size,
},
withPostfix(postfix):: self {
postfix: postfix,
},
withSparkline():: self {
sparkline: {
show: true,
lineColor: 'rgb(31, 120, 193)',
fillColor: 'rgba(31, 118, 189, 0.18)',
},
},
},
}

View file

@ -0,0 +1,5 @@
{
numbersinglestat:: import 'numbersinglestat.libsonnet',
gauge:: import 'gauge.libsonnet',
percentlinegraph:: import 'percentlinegraph.libsonnet',
}

View file

@ -0,0 +1,4 @@
(import 'config.libsonnet') +
(import 'alerts/alerts.libsonnet') +
(import 'dashboards/dashboards.libsonnet') +
(import 'rules/rules.libsonnet')

View file

@ -0,0 +1,121 @@
{
prometheusRules+:: {
groups+: [
{
name: 'node.rules',
rules: [
{
// This rule gives the number of CPUs per node.
record: 'instance:node_num_cpu:sum',
expr: |||
count by (instance) (
sum by (instance, cpu) (
node_cpu{%(nodeExporterSelector)s}
)
)
||| % $._config,
},
{
// CPU utilisation is % CPU is not idle.
record: 'instance:node_cpu_utilisation:avg1m',
expr: |||
1 - avg by (instance) (
rate(node_cpu{%(nodeExporterSelector)s,mode="idle"}[1m])
)
||| % $._config,
},
{
// CPU saturation is 1min avg run queue length / number of CPUs.
// Can go over 100%. >100% is bad.
record: 'instance:node_cpu_saturation_load1:',
expr: |||
sum by (instance) (
node_load1{%(nodeExporterSelector)s}
)
/
instance:node_num_cpu:sum
||| % $._config,
},
{
// Available memory per node
record: 'instance:node_memory_bytes_available:sum',
expr: |||
sum by (instance) (
(node_memory_MemFree{%(nodeExporterSelector)s} + node_memory_Cached{%(nodeExporterSelector)s} + node_memory_Buffers{%(nodeExporterSelector)s})
)
||| % $._config,
},
{
// Total memory per node
record: 'instance:node_memory_bytes_total:sum',
expr: |||
sum by (instance) (
node_memory_MemTotal{%(nodeExporterSelector)s}
)
||| % $._config,
},
{
// Memory utilisation per node, normalized by per-node memory
record: 'instance:node_memory_utilisation:ratio',
expr: |||
(instance:node_memory_bytes_total:sum - instance:node_memory_bytes_available:sum)
/
scalar(sum(instance:node_memory_bytes_total:sum))
|||,
},
{
record: 'instance:node_memory_utilisation:',
expr: |||
1 - (instance:node_memory_bytes_available:sum / instance:node_memory_bytes_total:sum)
||| % $._config,
},
{
record: 'instance:node_memory_swap_io_bytes:sum_rate',
expr: |||
1e3 * sum by (instance) (
(rate(node_vmstat_pgpgin{%(nodeExporterSelector)s}[1m])
+ rate(node_vmstat_pgpgout{%(nodeExporterSelector)s}[1m]))
)
||| % $._config,
},
{
// Disk utilisation (ms spent, by rate() it's bound by 1 second)
record: 'instance:node_disk_utilisation:avg_irate',
expr: |||
avg by (instance) (
irate(node_disk_io_time_ms{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
)
||| % $._config,
},
{
// Disk saturation (ms spent, by rate() it's bound by 1 second)
record: 'instance:node_disk_saturation:avg_irate',
expr: |||
avg by (instance) (
irate(node_disk_io_time_weighted{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
)
||| % $._config,
},
{
record: 'instance:node_net_utilisation:sum_irate',
expr: |||
sum by (instance) (
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]) +
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]))
)
||| % $._config,
},
{
record: 'instance:node_net_saturation:sum_irate',
expr: |||
sum by (instance) (
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device="eth0"}[1m]) +
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device="eth0"}[1m]))
)
||| % $._config,
},
],
},
],
},
}