node_exporter/node-mixin/dashboards/node.libsonnet
Tom Wilkie bafe1707f1
Beginnings of a node-exporter monitoring mixin.
Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
2018-11-19 16:46:10 +01:00

177 lines
6.2 KiB
Plaintext

local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local promgrafonnet = import '../lib/promgrafonnet/promgrafonnet.libsonnet';
local gauge = promgrafonnet.gauge;
{
grafanaDashboards+:: {
'nodes.json':
local idleCPU =
graphPanel.new(
'Idle CPU',
datasource='$datasource',
span=6,
format='percent',
max=100,
min=0,
)
.addTarget(prometheus.target(
|||
100 - (avg by (cpu) (irate(node_cpu{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[5m])) * 100)
||| % $._config,
legendFormat='{{cpu}}',
intervalFactor=10,
));
local systemLoad =
graphPanel.new(
'System load',
datasource='$datasource',
span=6,
format='percent',
)
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 1m'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 5m'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"} * 100' % $._config, legendFormat='load 15m'));
local memoryGraph =
graphPanel.new(
'Memory Usage',
datasource='$datasource',
span=9,
format='bytes',
)
.addTarget(prometheus.target(
|||
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}
||| % $._config, legendFormat='memory used'
))
.addTarget(prometheus.target('node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
local memoryGauge = gauge.new(
'Memory Usage',
|||
(
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_MemFree{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Buffers{%(nodeExporterSelector)s, instance="$instance"}
- node_memory_Cached{%(nodeExporterSelector)s, instance="$instance"}
) * 100
/
node_memory_MemTotal{%(nodeExporterSelector)s, instance="$instance"}
||| % $._config,
).withLowerBeingBetter();
local diskIO =
graphPanel.new(
'Disk I/O',
datasource='$datasource',
span=9,
)
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_bytes_read{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='read'))
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_bytes_written{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='written'))
.addTarget(prometheus.target('sum by (instance) (rate(node_disk_io_time_ms{%(nodeExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='io time')) +
{
seriesOverrides: [
{
alias: 'read',
yaxis: 1,
},
{
alias: 'io time',
yaxis: 2,
},
],
yaxes: [
self.yaxe(format='bytes'),
self.yaxe(format='ms'),
],
};
local diskSpaceUsage = gauge.new(
'Disk Space Usage',
|||
(
sum(node_filesystem_size{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
- sum(node_filesystem_free{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
) * 100
/
sum(node_filesystem_size{%(nodeExporterSelector)s, device!="rootfs", instance="$instance"})
||| % $._config,
).withLowerBeingBetter();
local networkReceived =
graphPanel.new(
'Network Received',
datasource='$datasource',
span=6,
format='bytes',
)
.addTarget(prometheus.target('rate(node_network_receive_bytes{%(nodeExporterSelector)s, instance="$instance", device!~"lo"}[5m])' % $._config, legendFormat='{{device}}'));
local networkTransmitted =
graphPanel.new(
'Network Transmitted',
datasource='$datasource',
span=6,
format='bytes',
)
.addTarget(prometheus.target('rate(node_network_transmit_bytes{%(nodeExporterSelector)s, instance="$instance", device!~"lo"}[5m])' % $._config, legendFormat='{{device}}'));
dashboard.new('Nodes', time_from='now-1h')
.addTemplate(
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addTemplate(
template.new(
'instance',
'$datasource',
'label_values(node_boot_time{%(nodeExporterSelector)s}, instance)' % $._config,
refresh='time',
)
)
.addRow(
row.new()
.addPanel(idleCPU)
.addPanel(systemLoad)
)
.addRow(
row.new()
.addPanel(memoryGraph)
.addPanel(memoryGauge)
)
.addRow(
row.new()
.addPanel(diskIO)
.addPanel(diskSpaceUsage)
)
.addRow(
row.new()
.addPanel(networkReceived)
.addPanel(networkTransmitted)
),
},
}