| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | { | 
					
						
							|  |  |  |   prometheusRules+:: { | 
					
						
							|  |  |  |     groups+: [ | 
					
						
							|  |  |  |       { | 
					
						
							| 
									
										
										
										
											2018-08-06 01:46:28 -07:00
										 |  |  |         name: 'node-exporter.rules', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |         rules: [ | 
					
						
							|  |  |  |           { | 
					
						
							|  |  |  |             // This rule gives the number of CPUs per node. | 
					
						
							|  |  |  |             record: 'instance:node_num_cpu:sum', | 
					
						
							|  |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2021-09-03 03:13:35 -07:00
										 |  |  |               count without (cpu, mode) ( | 
					
						
							| 
									
										
										
										
											2021-09-02 06:06:50 -07:00
										 |  |  |                 node_cpu_seconds_total{%(nodeExporterSelector)s,mode="idle"} | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-11-04 03:03:27 -07:00
										 |  |  |             // CPU utilisation is % CPU without {idle,iowait,steal}. | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_cpu_utilisation:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2021-11-04 03:03:27 -07:00
										 |  |  |               1 - avg without (cpu) ( | 
					
						
							|  |  |  |                 sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal"}[%(rateInterval)s])) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |             // This is CPU saturation: 1min avg run queue length / number of CPUs. | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |             // Can go over 1. | 
					
						
							|  |  |  |             // TODO: There are situation where a run queue >1/core is just normal and fine. | 
					
						
							| 
									
										
										
										
											2019-07-22 05:06:27 -07:00
										 |  |  |             //       We need to clarify how to read this metric and if its usage is helpful at all. | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |             record: 'instance:node_load1_per_cpu:ratio', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |               ( | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |                 node_load1{%(nodeExporterSelector)s} | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               / | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |                 instance:node_num_cpu:sum{%(nodeExporterSelector)s} | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2019-07-22 05:06:27 -07:00
										 |  |  |             // Memory utilisation (ratio of used memory per instance). | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             record: 'instance:node_memory_utilisation:ratio', | 
					
						
							|  |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2018-05-10 02:35:48 -07:00
										 |  |  |               1 - ( | 
					
						
							| 
									
										
										
										
											2021-09-28 01:22:06 -07:00
										 |  |  |                 ( | 
					
						
							| 
									
										
										
										
											2021-10-21 03:07:38 -07:00
										 |  |  |                   node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} | 
					
						
							|  |  |  |                   or | 
					
						
							|  |  |  |                   ( | 
					
						
							|  |  |  |                     node_memory_Buffers_bytes{%(nodeExporterSelector)s} | 
					
						
							|  |  |  |                     + | 
					
						
							|  |  |  |                     node_memory_Cached_bytes{%(nodeExporterSelector)s} | 
					
						
							|  |  |  |                     + | 
					
						
							|  |  |  |                     node_memory_MemFree_bytes{%(nodeExporterSelector)s} | 
					
						
							|  |  |  |                     + | 
					
						
							|  |  |  |                     node_memory_Slab_bytes{%(nodeExporterSelector)s} | 
					
						
							|  |  |  |                   ) | 
					
						
							| 
									
										
										
										
											2021-09-28 01:22:06 -07:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2019-07-10 11:07:20 -07:00
										 |  |  |               / | 
					
						
							|  |  |  |                 node_memory_MemTotal_bytes{%(nodeExporterSelector)s} | 
					
						
							| 
									
										
										
										
											2018-05-10 02:35:48 -07:00
										 |  |  |               ) | 
					
						
							| 
									
										
										
										
											2018-07-13 06:01:01 -07:00
										 |  |  |             ||| % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |               rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2019-07-22 05:06:27 -07:00
										 |  |  |             // Disk utilisation (seconds spent, 1 second rate). | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |               rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2019-07-22 05:06:27 -07:00
										 |  |  |             // Disk saturation (weighted seconds spent, 1 second rate). | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |               rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               sum without (device) ( | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |                 rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |             expr: ||| | 
					
						
							| 
									
										
										
										
											2019-07-12 13:58:43 -07:00
										 |  |  |               sum without (device) ( | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |                 rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |           // TODO: Find out if those drops ever happen on modern switched networks. | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |             expr: ||| | 
					
						
							|  |  |  |               sum without (device) ( | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |                 rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |           { | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |             record: 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s' % $._config, | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |             expr: ||| | 
					
						
							|  |  |  |               sum without (device) ( | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |                 rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s]) | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |               ) | 
					
						
							|  |  |  |             ||| % $._config, | 
					
						
							|  |  |  |           }, | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |       }, | 
					
						
							|  |  |  |     ], | 
					
						
							|  |  |  |   }, | 
					
						
							|  |  |  | } |