2018-05-08 03:10:29 -07:00
|
|
|
local grafana = import 'grafonnet/grafana.libsonnet';
|
|
|
|
local dashboard = grafana.dashboard;
|
|
|
|
local row = grafana.row;
|
|
|
|
local prometheus = grafana.prometheus;
|
|
|
|
local template = grafana.template;
|
|
|
|
local graphPanel = grafana.graphPanel;
|
2019-07-06 12:09:17 -07:00
|
|
|
local promgrafonnet = import 'promgrafonnet/promgrafonnet.libsonnet';
|
2018-05-08 03:10:29 -07:00
|
|
|
local gauge = promgrafonnet.gauge;
|
|
|
|
|
|
|
|
{
|
|
|
|
grafanaDashboards+:: {
|
|
|
|
'nodes.json':
|
|
|
|
local idleCPU =
|
|
|
|
graphPanel.new(
|
|
|
|
'Idle CPU',
|
|
|
|
datasource='$datasource',
|
|
|
|
span=6,
|
2018-05-10 02:05:59 -07:00
|
|
|
format='percentunit',
|
2018-05-08 03:10:29 -07:00
|
|
|
max=100,
|
|
|
|
min=0,
|
|
|
|
)
|
|
|
|
.addTarget(prometheus.target(
|
|
|
|
|||
|
2019-07-12 13:58:43 -07:00
|
|
|
1 - avg by (cpu) (irate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m]))
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
legendFormat='{{cpu}}',
|
|
|
|
intervalFactor=10,
|
|
|
|
));
|
|
|
|
|
2019-07-12 13:58:43 -07:00
|
|
|
// TODO: Is this panel useful?
|
2018-05-08 03:10:29 -07:00
|
|
|
local systemLoad =
|
|
|
|
graphPanel.new(
|
2019-07-12 13:58:43 -07:00
|
|
|
'Load Average',
|
2018-05-08 03:10:29 -07:00
|
|
|
datasource='$datasource',
|
|
|
|
span=6,
|
2019-07-12 13:58:43 -07:00
|
|
|
format='short',
|
2018-05-08 03:10:29 -07:00
|
|
|
)
|
2019-07-12 13:58:43 -07:00
|
|
|
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average'))
|
|
|
|
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average'))
|
|
|
|
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average'));
|
2018-05-08 03:10:29 -07:00
|
|
|
|
|
|
|
local memoryGraph =
|
|
|
|
graphPanel.new(
|
|
|
|
'Memory Usage',
|
|
|
|
datasource='$datasource',
|
|
|
|
span=9,
|
|
|
|
format='bytes',
|
|
|
|
)
|
|
|
|
.addTarget(prometheus.target(
|
|
|
|
|||
|
2019-07-10 11:07:20 -07:00
|
|
|
(
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
-
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
-
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
-
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
)
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config, legendFormat='memory used'
|
|
|
|
))
|
2019-07-12 13:58:43 -07:00
|
|
|
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers'))
|
|
|
|
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached'))
|
|
|
|
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free'));
|
2018-05-08 03:10:29 -07:00
|
|
|
|
|
|
|
local memoryGauge = gauge.new(
|
|
|
|
'Memory Usage',
|
|
|
|
|||
|
2019-07-10 11:07:20 -07:00
|
|
|
(
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2018-05-10 02:21:00 -07:00
|
|
|
/
|
2019-07-12 13:58:43 -07:00
|
|
|
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
)
|
|
|
|
* 100
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
).withLowerBeingBetter();
|
|
|
|
|
|
|
|
local diskIO =
|
|
|
|
graphPanel.new(
|
|
|
|
'Disk I/O',
|
|
|
|
datasource='$datasource',
|
|
|
|
span=9,
|
|
|
|
)
|
2019-07-12 13:58:43 -07:00
|
|
|
.addTarget(prometheus.target('sum by (instance) (irate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='read'))
|
|
|
|
.addTarget(prometheus.target('sum by (instance) (irate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='written'))
|
|
|
|
.addTarget(prometheus.target('sum by (instance) (irate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance"}[1m]))' % $._config, legendFormat='io time')) +
|
2018-05-08 03:10:29 -07:00
|
|
|
{
|
|
|
|
seriesOverrides: [
|
|
|
|
{
|
|
|
|
alias: 'read',
|
|
|
|
yaxis: 1,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
alias: 'io time',
|
|
|
|
yaxis: 2,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
yaxes: [
|
|
|
|
self.yaxe(format='bytes'),
|
2019-07-12 13:58:43 -07:00
|
|
|
self.yaxe(format='s'),
|
2018-05-08 03:10:29 -07:00
|
|
|
],
|
|
|
|
};
|
|
|
|
|
2019-07-12 13:58:43 -07:00
|
|
|
// TODO: Should this be partitioned by mountpoint?
|
2018-05-08 03:10:29 -07:00
|
|
|
local diskSpaceUsage = gauge.new(
|
|
|
|
'Disk Space Usage',
|
|
|
|
|||
|
2019-07-10 11:07:20 -07:00
|
|
|
100 -
|
|
|
|
(
|
2019-07-12 13:58:43 -07:00
|
|
|
sum(node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
/
|
2019-07-12 13:58:43 -07:00
|
|
|
sum(node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}
|
2019-07-10 11:07:20 -07:00
|
|
|
* 100
|
|
|
|
)
|
2018-05-08 03:10:29 -07:00
|
|
|
||| % $._config,
|
|
|
|
).withLowerBeingBetter();
|
|
|
|
|
|
|
|
local networkReceived =
|
|
|
|
graphPanel.new(
|
|
|
|
'Network Received',
|
|
|
|
datasource='$datasource',
|
|
|
|
span=6,
|
|
|
|
format='bytes',
|
|
|
|
)
|
2019-07-12 13:58:43 -07:00
|
|
|
.addTarget(prometheus.target('irate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}'));
|
2018-05-08 03:10:29 -07:00
|
|
|
|
|
|
|
local networkTransmitted =
|
|
|
|
graphPanel.new(
|
|
|
|
'Network Transmitted',
|
|
|
|
datasource='$datasource',
|
|
|
|
span=6,
|
|
|
|
format='bytes',
|
|
|
|
)
|
2019-07-12 13:58:43 -07:00
|
|
|
.addTarget(prometheus.target('irate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}'));
|
2018-05-08 03:10:29 -07:00
|
|
|
|
|
|
|
dashboard.new('Nodes', time_from='now-1h')
|
|
|
|
.addTemplate(
|
|
|
|
{
|
|
|
|
current: {
|
|
|
|
text: 'Prometheus',
|
|
|
|
value: 'Prometheus',
|
|
|
|
},
|
|
|
|
hide: 0,
|
|
|
|
label: null,
|
|
|
|
name: 'datasource',
|
|
|
|
options: [],
|
|
|
|
query: 'prometheus',
|
|
|
|
refresh: 1,
|
|
|
|
regex: '',
|
|
|
|
type: 'datasource',
|
|
|
|
},
|
|
|
|
)
|
|
|
|
.addTemplate(
|
|
|
|
template.new(
|
|
|
|
'instance',
|
|
|
|
'$datasource',
|
2019-07-12 13:58:43 -07:00
|
|
|
'label_values(node_boot_time_seconds{%(nodeExporterSelector)s}, instance)' % $._config,
|
2018-05-08 03:10:29 -07:00
|
|
|
refresh='time',
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.addRow(
|
|
|
|
row.new()
|
|
|
|
.addPanel(idleCPU)
|
|
|
|
.addPanel(systemLoad)
|
|
|
|
)
|
|
|
|
.addRow(
|
|
|
|
row.new()
|
|
|
|
.addPanel(memoryGraph)
|
|
|
|
.addPanel(memoryGauge)
|
|
|
|
)
|
|
|
|
.addRow(
|
|
|
|
row.new()
|
|
|
|
.addPanel(diskIO)
|
|
|
|
.addPanel(diskSpaceUsage)
|
|
|
|
)
|
|
|
|
.addRow(
|
|
|
|
row.new()
|
|
|
|
.addPanel(networkReceived)
|
|
|
|
.addPanel(networkTransmitted)
|
|
|
|
),
|
|
|
|
},
|
|
|
|
}
|