| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | { | 
					
						
							|  |  |  |   _config+:: { | 
					
						
							|  |  |  |     // Selectors are inserted between {} in Prometheus queries. | 
					
						
							| 
									
										
										
										
											2019-07-16 10:34:27 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-30 14:52:36 -07:00
										 |  |  |     // Select the metrics coming from the node exporter. Note that all | 
					
						
							|  |  |  |     // the selected metrics are shown stacked on top of each other in | 
					
						
							|  |  |  |     // the 'USE Method / Cluster' dashboard. Consider disabling that | 
					
						
							|  |  |  |     // dashboard if mixing up all those metrics in the same dashboard | 
					
						
							|  |  |  |     // doesn't make sense (e.g. because they are coming from different | 
					
						
							|  |  |  |     // clusters). | 
					
						
							| 
									
										
										
										
											2019-07-16 12:18:17 -07:00
										 |  |  |     nodeExporterSelector: 'job="node"', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |     // Select the fstype for filesystem-related queries. If left | 
					
						
							|  |  |  |     // empty, all filesystems are selected. If you have unusual | 
					
						
							|  |  |  |     // filesystem you don't want to include in dashboards and | 
					
						
							|  |  |  |     // alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'. | 
					
						
							| 
									
										
										
										
											2019-09-12 04:57:19 -07:00
										 |  |  |     fsSelector: 'fstype!=""', | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-20 04:06:31 -07:00
										 |  |  |     // Select the mountpoint for filesystem-related queries. If left | 
					
						
							|  |  |  |     // empty, all mountpoints are selected. For example if you have a | 
					
						
							|  |  |  |     // special purpose tmpfs instance that has a fixed size and will | 
					
						
							|  |  |  |     // always be 100% full, but you still want alerts and dashboards for | 
					
						
							|  |  |  |     // other tmpfs instances, you can exclude those by mountpoint prefix | 
					
						
							|  |  |  |     // like so: 'mountpoint!~"/var/lib/foo.*"'. | 
					
						
							|  |  |  |     fsMountpointSelector: 'mountpoint!=""', | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-17 14:54:31 -07:00
										 |  |  |     // Select the device for disk-related queries. If left empty, all | 
					
						
							|  |  |  |     // devices are selected. If you have unusual devices you don't | 
					
						
							|  |  |  |     // want to include in dashboards and alerting, you can exclude | 
					
						
							|  |  |  |     // them here, e.g. 'device!="tmpfs"'. | 
					
						
							| 
									
										
										
										
											2019-09-12 04:57:19 -07:00
										 |  |  |     diskDeviceSelector: 'device!=""', | 
					
						
							| 
									
										
										
										
											2019-07-16 10:34:27 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-14 13:24:24 -07:00
										 |  |  |     // Some of the alerts are meant to fire if a critical failure of a | 
					
						
							|  |  |  |     // node is imminent (e.g. the disk is about to run full). In a | 
					
						
							|  |  |  |     // true “cloud native” setup, failures of a single node should be | 
					
						
							|  |  |  |     // tolerated. Hence, even imminent failure of a single node is no | 
					
						
							|  |  |  |     // reason to create a paging alert. However, in practice there are | 
					
						
							|  |  |  |     // still many situations where operators like to get paged in time | 
					
						
							|  |  |  |     // before a node runs out of disk space. nodeCriticalSeverity can | 
					
						
							|  |  |  |     // be set to the desired severity for this kind of alerts. This | 
					
						
							|  |  |  |     // can even be templated to depend on labels of the node, e.g. you | 
					
						
							|  |  |  |     // could make this critical for traditional database masters but | 
					
						
							|  |  |  |     // just a warning for K8s nodes. | 
					
						
							|  |  |  |     nodeCriticalSeverity: 'critical', | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-05 11:30:53 -07:00
										 |  |  |     // CPU utilization (%) on which to trigger the | 
					
						
							|  |  |  |     // 'NodeCPUHighUsage' alert. | 
					
						
							|  |  |  |     cpuHighUsageThreshold: 90, | 
					
						
							| 
									
										
										
										
											2023-04-05 09:56:00 -07:00
										 |  |  |     // Load average 1m (per core) on which to trigger the | 
					
						
							|  |  |  |     // 'NodeSystemSaturation' alert. | 
					
						
							|  |  |  |     systemSaturationPerCoreThreshold: 2, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-03-02 07:24:51 -08:00
										 |  |  |     // Available disk space (%) thresholds on which to trigger the | 
					
						
							|  |  |  |     // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk | 
					
						
							|  |  |  |     // usage grows in a way that it is predicted to run out in 4h or 1d | 
					
						
							|  |  |  |     // and if the provided thresholds have been reached right now. | 
					
						
							|  |  |  |     // In some cases you'll want to adjust these, e.g. by default Kubernetes | 
					
						
							|  |  |  |     // runs the image garbage collection when the disk usage reaches 85% | 
					
						
							|  |  |  |     // of its available space. In that case, you'll want to reduce the | 
					
						
							|  |  |  |     // critical threshold below to something like 14 or 15, otherwise | 
					
						
							|  |  |  |     // the alert could fire under normal node usage. | 
					
						
							|  |  |  |     fsSpaceFillingUpWarningThreshold: 40, | 
					
						
							|  |  |  |     fsSpaceFillingUpCriticalThreshold: 20, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-18 02:28:32 -07:00
										 |  |  |     // Available disk space (%) thresholds on which to trigger the | 
					
						
							|  |  |  |     // 'NodeFilesystemAlmostOutOfSpace' alerts. | 
					
						
							| 
									
										
										
										
											2022-05-10 05:50:20 -07:00
										 |  |  |     fsSpaceAvailableWarningThreshold: 5, | 
					
						
							|  |  |  |     fsSpaceAvailableCriticalThreshold: 3, | 
					
						
							| 
									
										
										
										
											2020-09-18 02:28:32 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-05 09:21:50 -07:00
										 |  |  |     // Memory utilzation (%) level on which to trigger the | 
					
						
							|  |  |  |     // 'NodeMemoryHighUtilization' alert. | 
					
						
							|  |  |  |     memoryHighUtilizationThreshold: 90, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Threshold for the rate of memory major page faults to trigger | 
					
						
							|  |  |  |     // 'NodeMemoryMajorPagesFaults' alert. | 
					
						
							| 
									
										
										
										
											2023-04-05 09:56:00 -07:00
										 |  |  |     memoryMajorPagesFaultsThreshold: 500, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Disk IO queue level above which to trigger | 
					
						
							|  |  |  |     // 'NodeDiskIOSaturation' alert. | 
					
						
							|  |  |  |     diskIOSaturationThreshold: 10, | 
					
						
							| 
									
										
										
										
											2023-04-05 09:21:50 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-03 03:40:22 -07:00
										 |  |  |     rateInterval: '5m', | 
					
						
							| 
									
										
										
										
											2021-04-01 17:34:23 -07:00
										 |  |  |     // Opt-in for multi-cluster support. | 
					
						
							|  |  |  |     showMultiCluster: false, | 
					
						
							|  |  |  |     clusterLabel: 'cluster', | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dashboardNamePrefix: 'Node Exporter / ', | 
					
						
							|  |  |  |     dashboardTags: ['node-exporter-mixin'], | 
					
						
							| 
									
										
										
										
											2018-05-08 03:10:29 -07:00
										 |  |  |   }, | 
					
						
							|  |  |  | } |