2018-05-08 03:10:29 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  _config+:: {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // Selectors are inserted between {} in Prometheus queries.
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-16 10:34:27 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 14:52:36 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Select the metrics coming from the node exporter. Note that all
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // the selected metrics are shown stacked on top of each other in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // the 'USE Method / Cluster' dashboard. Consider disabling that
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // dashboard if mixing up all those metrics in the same dashboard
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // doesn't make sense (e.g. because they are coming from different
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // clusters).
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    nodeExporterSelector: 'job="node"',
							 | 
						
					
						
							
								
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Select the fstype for filesystem-related queries. If left
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // empty, all filesystems are selected. If you have unusual
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // filesystem you don't want to include in dashboards and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'.
							 | 
						
					
						
							
								
									
										
										
										
											2019-09-12 04:57:19 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    fsSelector: 'fstype!=""',
							 | 
						
					
						
							
								
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-20 04:06:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Select the mountpoint for filesystem-related queries. If left
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // empty, all mountpoints are selected. For example if you have a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // special purpose tmpfs instance that has a fixed size and will
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // always be 100% full, but you still want alerts and dashboards for
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // other tmpfs instances, you can exclude those by mountpoint prefix
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // like so: 'mountpoint!~"/var/lib/foo.*"'.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fsMountpointSelector: 'mountpoint!=""',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Select the device for disk-related queries. If left empty, all
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // devices are selected. If you have unusual devices you don't
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // want to include in dashboards and alerting, you can exclude
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // them here, e.g. 'device!="tmpfs"'.
							 | 
						
					
						
							
								
									
										
										
										
											2019-09-12 04:57:19 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    diskDeviceSelector: 'device!=""',
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-16 10:34:27 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-08-14 13:24:24 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Some of the alerts are meant to fire if a critical failure of a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // node is imminent (e.g. the disk is about to run full). In a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // true “cloud native” setup, failures of a single node should be
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // tolerated. Hence, even imminent failure of a single node is no
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // reason to create a paging alert. However, in practice there are
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // still many situations where operators like to get paged in time
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // before a node runs out of disk space. nodeCriticalSeverity can
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // be set to the desired severity for this kind of alerts. This
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // can even be templated to depend on labels of the node, e.g. you
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // could make this critical for traditional database masters but
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // just a warning for K8s nodes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    nodeCriticalSeverity: 'critical',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-05 11:30:53 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // CPU utilization (%) on which to trigger the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeCPUHighUsage' alert.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    cpuHighUsageThreshold: 90,
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-05 09:56:00 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Load average 1m (per core) on which to trigger the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeSystemSaturation' alert.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    systemSaturationPerCoreThreshold: 2,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-03-02 07:24:51 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Available disk space (%) thresholds on which to trigger the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // usage grows in a way that it is predicted to run out in 4h or 1d
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // and if the provided thresholds have been reached right now.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // In some cases you'll want to adjust these, e.g. by default Kubernetes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // runs the image garbage collection when the disk usage reaches 85%
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // of its available space. In that case, you'll want to reduce the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // critical threshold below to something like 14 or 15, otherwise
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // the alert could fire under normal node usage.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fsSpaceFillingUpWarningThreshold: 40,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fsSpaceFillingUpCriticalThreshold: 20,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-09-18 02:28:32 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Available disk space (%) thresholds on which to trigger the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeFilesystemAlmostOutOfSpace' alerts.
							 | 
						
					
						
							
								
									
										
										
										
											2022-05-10 05:50:20 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    fsSpaceAvailableWarningThreshold: 5,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fsSpaceAvailableCriticalThreshold: 3,
							 | 
						
					
						
							
								
									
										
										
										
											2020-09-18 02:28:32 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-05 09:21:50 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Memory utilzation (%) level on which to trigger the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeMemoryHighUtilization' alert.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    memoryHighUtilizationThreshold: 90,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // Threshold for the rate of memory major page faults to trigger
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeMemoryMajorPagesFaults' alert.
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-05 09:56:00 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    memoryMajorPagesFaultsThreshold: 500,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // Disk IO queue level above which to trigger
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    // 'NodeDiskIOSaturation' alert.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    diskIOSaturationThreshold: 10,
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-05 09:21:50 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    rateInterval: '5m',
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-01 17:34:23 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // Opt-in for multi-cluster support.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    showMultiCluster: false,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    clusterLabel: 'cluster',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dashboardNamePrefix: 'Node Exporter / ',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dashboardTags: ['node-exporter-mixin'],
							 | 
						
					
						
							
								
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |