| 
									
										
										
										
											2020-10-20 02:34:43 -07:00
										 |  |  | local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | local dashboard = grafana.dashboard; | 
					
						
							|  |  |  | local row = grafana.row; | 
					
						
							|  |  |  | local prometheus = grafana.prometheus; | 
					
						
							|  |  |  | local template = grafana.template; | 
					
						
							|  |  |  | local graphPanel = grafana.graphPanel; | 
					
						
							| 
									
										
										
										
											2020-10-20 02:34:43 -07:00
										 |  |  | local promgrafonnet = import 'github.com/kubernetes-monitoring/kubernetes-mixin/lib/promgrafonnet/promgrafonnet.libsonnet'; | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | local gauge = promgrafonnet.gauge; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |   grafanaDashboards+:: { | 
					
						
							|  |  |  |     'nodes.json': | 
					
						
							|  |  |  |       local idleCPU = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							| 
									
										
										
										
											2019-08-15 07:36:10 -07:00
										 |  |  |           'CPU Usage', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=6, | 
					
						
							| 
									
										
										
										
											2018-05-10 02:05:59 -07:00
										 |  |  |           format='percentunit', | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           max=1, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           stack=true, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							|  |  |  |           ||| | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |             ( | 
					
						
							| 
									
										
										
										
											2021-11-04 03:03:27 -07:00
										 |  |  |               (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval]))) | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |             / ignoring(cpu) group_left | 
					
						
							| 
									
										
										
										
											2021-11-04 03:03:27 -07:00
										 |  |  |               count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           ||| % $._config, | 
					
						
							|  |  |  |           legendFormat='{{cpu}}', | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           intervalFactor=5, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         )); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       local systemLoad = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |           'Load Average', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=6, | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |           format='short', | 
					
						
							| 
									
										
										
										
											2019-08-15 07:32:54 -07:00
										 |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           fill=0, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |         .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) | 
					
						
							|  |  |  |         .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) | 
					
						
							| 
									
										
										
										
											2019-08-15 07:43:57 -07:00
										 |  |  |         .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='15m load average')) | 
					
						
							|  |  |  |         .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % $._config, legendFormat='logical cores')); | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |       local memoryGraph = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							|  |  |  |           'Memory Usage', | 
					
						
							|  |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=9, | 
					
						
							|  |  |  |           format='bytes', | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           stack=true, | 
					
						
							|  |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							|  |  |  |           ||| | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |             ( | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"} | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |             - | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"} | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |             - | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"} | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |             - | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"} | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           ||| % $._config, legendFormat='memory used' | 
					
						
							|  |  |  |         )) | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |         .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory buffers')) | 
					
						
							|  |  |  |         .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory cached')) | 
					
						
							|  |  |  |         .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='memory free')); | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |       // TODO: It would be nicer to have a gauge that gets a 0-1 range and displays it as a percentage 0%-100%. | 
					
						
							|  |  |  |       // This needs to be added upstream in the promgrafonnet library and then changed here. | 
					
						
							| 
									
										
										
										
											2020-09-09 17:57:04 -07:00
										 |  |  |       // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |       local memoryGauge = gauge.new( | 
					
						
							|  |  |  |         'Memory Usage', | 
					
						
							|  |  |  |         ||| | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |           100 - | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |           ( | 
					
						
							| 
									
										
										
										
											2020-09-09 17:57:04 -07:00
										 |  |  |             avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) | 
					
						
							| 
									
										
										
										
											2018-05-10 02:21:00 -07:00
										 |  |  |           / | 
					
						
							| 
									
										
										
										
											2020-09-09 17:57:04 -07:00
										 |  |  |             avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}) | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |           * 100 | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |           ) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ||| % $._config, | 
					
						
							|  |  |  |       ).withLowerBeingBetter(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       local diskIO = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							|  |  |  |           'Disk I/O', | 
					
						
							|  |  |  |           datasource='$datasource', | 
					
						
							| 
									
										
										
										
											2019-08-15 07:32:54 -07:00
										 |  |  |           span=6, | 
					
						
							|  |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           fill=0, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |         // TODO: Does it make sense to have those three in the same panel? | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |         .addTarget(prometheus.target( | 
					
						
							| 
									
										
										
										
											2021-04-23 21:40:51 -07:00
										 |  |  |           'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           legendFormat='{{device}} read', | 
					
						
							|  |  |  |         )) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							| 
									
										
										
										
											2021-04-23 21:40:51 -07:00
										 |  |  |           'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           legendFormat='{{device}} written', | 
					
						
							|  |  |  |         )) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							| 
									
										
										
										
											2021-04-23 21:40:51 -07:00
										 |  |  |           'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % $._config, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           legendFormat='{{device}} io time', | 
					
						
							|  |  |  |         )) + | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         { | 
					
						
							|  |  |  |           seriesOverrides: [ | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |               alias: '/ read| written/', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               yaxis: 1, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |               alias: '/ io time/', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               yaxis: 2, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |           ], | 
					
						
							|  |  |  |           yaxes: [ | 
					
						
							|  |  |  |             self.yaxe(format='bytes'), | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |             self.yaxe(format='s'), | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           ], | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-15 07:32:54 -07:00
										 |  |  |       // TODO: Somehow partition this by device while excluding read-only devices. | 
					
						
							|  |  |  |       local diskSpaceUsage = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							|  |  |  |           'Disk Space Usage', | 
					
						
							|  |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=6, | 
					
						
							|  |  |  |           format='bytes', | 
					
						
							|  |  |  |           min=0, | 
					
						
							|  |  |  |           fill=1, | 
					
						
							|  |  |  |           stack=true, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							|  |  |  |           ||| | 
					
						
							|  |  |  |             sum( | 
					
						
							|  |  |  |               max by (device) ( | 
					
						
							|  |  |  |                 node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} | 
					
						
							|  |  |  |               - | 
					
						
							|  |  |  |                 node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} | 
					
						
							|  |  |  |               ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |           ||| % $._config, | 
					
						
							|  |  |  |           legendFormat='used', | 
					
						
							|  |  |  |         )) | 
					
						
							|  |  |  |         .addTarget(prometheus.target( | 
					
						
							|  |  |  |           ||| | 
					
						
							|  |  |  |             sum( | 
					
						
							|  |  |  |               max by (device) ( | 
					
						
							|  |  |  |                 node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s} | 
					
						
							|  |  |  |               ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |           ||| % $._config, | 
					
						
							|  |  |  |           legendFormat='available', | 
					
						
							|  |  |  |         )) + | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |           seriesOverrides: [ | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |               alias: 'used', | 
					
						
							|  |  |  |               color: '#E0B400', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |               alias: 'available', | 
					
						
							|  |  |  |               color: '#73BF69', | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |           ], | 
					
						
							|  |  |  |         }; | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |       local networkReceived = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							|  |  |  |           'Network Received', | 
					
						
							|  |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=6, | 
					
						
							|  |  |  |           format='bytes', | 
					
						
							| 
									
										
										
										
											2019-08-15 07:32:54 -07:00
										 |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           fill=0, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |         .addTarget(prometheus.target( | 
					
						
							| 
									
										
										
										
											2021-04-23 21:40:51 -07:00
										 |  |  |           'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           legendFormat='{{device}}', | 
					
						
							|  |  |  |         )); | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |       local networkTransmitted = | 
					
						
							|  |  |  |         graphPanel.new( | 
					
						
							|  |  |  |           'Network Transmitted', | 
					
						
							|  |  |  |           datasource='$datasource', | 
					
						
							|  |  |  |           span=6, | 
					
						
							|  |  |  |           format='bytes', | 
					
						
							| 
									
										
										
										
											2019-08-15 07:32:54 -07:00
										 |  |  |           min=0, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           fill=0, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |         .addTarget(prometheus.target( | 
					
						
							| 
									
										
										
										
											2021-04-23 21:40:51 -07:00
										 |  |  |           'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval])' % $._config, | 
					
						
							| 
									
										
										
										
											2019-08-14 15:40:51 -07:00
										 |  |  |           legendFormat='{{device}}', | 
					
						
							|  |  |  |         )); | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-01 17:34:23 -07:00
										 |  |  |       dashboard.new( | 
					
						
							|  |  |  |         '%sNodes' % $._config.dashboardNamePrefix, | 
					
						
							|  |  |  |         time_from='now-1h', | 
					
						
							|  |  |  |         tags=($._config.dashboardTags), | 
					
						
							|  |  |  |         timezone='utc', | 
					
						
							|  |  |  |         refresh='30s', | 
					
						
							|  |  |  |         graphTooltip='shared_crosshair' | 
					
						
							|  |  |  |       ) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |       .addTemplate( | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |           current: { | 
					
						
							|  |  |  |             text: 'Prometheus', | 
					
						
							|  |  |  |             value: 'Prometheus', | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           hide: 0, | 
					
						
							| 
									
										
										
										
											2021-10-20 09:10:14 -07:00
										 |  |  |           label: 'Data Source', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           name: 'datasource', | 
					
						
							|  |  |  |           options: [], | 
					
						
							|  |  |  |           query: 'prometheus', | 
					
						
							|  |  |  |           refresh: 1, | 
					
						
							|  |  |  |           regex: '', | 
					
						
							|  |  |  |           type: 'datasource', | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |       ) | 
					
						
							|  |  |  |       .addTemplate( | 
					
						
							|  |  |  |         template.new( | 
					
						
							|  |  |  |           'instance', | 
					
						
							|  |  |  |           '$datasource', | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |           'label_values(node_exporter_build_info{%(nodeExporterSelector)s}, instance)' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           refresh='time', | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |       ) | 
					
						
							|  |  |  |       .addRow( | 
					
						
							|  |  |  |         row.new() | 
					
						
							|  |  |  |         .addPanel(idleCPU) | 
					
						
							|  |  |  |         .addPanel(systemLoad) | 
					
						
							|  |  |  |       ) | 
					
						
							|  |  |  |       .addRow( | 
					
						
							|  |  |  |         row.new() | 
					
						
							|  |  |  |         .addPanel(memoryGraph) | 
					
						
							|  |  |  |         .addPanel(memoryGauge) | 
					
						
							|  |  |  |       ) | 
					
						
							|  |  |  |       .addRow( | 
					
						
							|  |  |  |         row.new() | 
					
						
							|  |  |  |         .addPanel(diskIO) | 
					
						
							|  |  |  |         .addPanel(diskSpaceUsage) | 
					
						
							|  |  |  |       ) | 
					
						
							|  |  |  |       .addRow( | 
					
						
							|  |  |  |         row.new() | 
					
						
							|  |  |  |         .addPanel(networkReceived) | 
					
						
							|  |  |  |         .addPanel(networkTransmitted) | 
					
						
							|  |  |  |       ), | 
					
						
							|  |  |  |   }, | 
					
						
							|  |  |  | } |