mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			97 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			97 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|   _config+:: {
 | |
|     // Selectors are inserted between {} in Prometheus queries.
 | |
| 
 | |
|     // Select the metrics coming from the node exporter. Note that all
 | |
|     // the selected metrics are shown stacked on top of each other in
 | |
|     // the 'USE Method / Cluster' dashboard. Consider disabling that
 | |
|     // dashboard if mixing up all those metrics in the same dashboard
 | |
|     // doesn't make sense (e.g. because they are coming from different
 | |
|     // clusters).
 | |
|     nodeExporterSelector: 'job="node"',
 | |
| 
 | |
|     // Select the fstype for filesystem-related queries. If left
 | |
|     // empty, all filesystems are selected. If you have unusual
 | |
|     // filesystem you don't want to include in dashboards and
 | |
|     // alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'.
 | |
|     fsSelector: 'fstype!=""',
 | |
| 
 | |
|     // Select the mountpoint for filesystem-related queries. If left
 | |
|     // empty, all mountpoints are selected. For example if you have a
 | |
|     // special purpose tmpfs instance that has a fixed size and will
 | |
|     // always be 100% full, but you still want alerts and dashboards for
 | |
|     // other tmpfs instances, you can exclude those by mountpoint prefix
 | |
|     // like so: 'mountpoint!~"/var/lib/foo.*"'.
 | |
|     fsMountpointSelector: 'mountpoint!=""',
 | |
| 
 | |
|     // Select the device for disk-related queries. If left empty, all
 | |
|     // devices are selected. If you have unusual devices you don't
 | |
|     // want to include in dashboards and alerting, you can exclude
 | |
|     // them here, e.g. 'device!="tmpfs"'.
 | |
|     diskDeviceSelector: 'device!=""',
 | |
| 
 | |
|     // Some of the alerts are meant to fire if a critical failure of a
 | |
|     // node is imminent (e.g. the disk is about to run full). In a
 | |
|     // true “cloud native” setup, failures of a single node should be
 | |
|     // tolerated. Hence, even imminent failure of a single node is no
 | |
|     // reason to create a paging alert. However, in practice there are
 | |
|     // still many situations where operators like to get paged in time
 | |
|     // before a node runs out of disk space. nodeCriticalSeverity can
 | |
|     // be set to the desired severity for this kind of alerts. This
 | |
|     // can even be templated to depend on labels of the node, e.g. you
 | |
|     // could make this critical for traditional database masters but
 | |
|     // just a warning for K8s nodes.
 | |
|     nodeCriticalSeverity: 'critical',
 | |
| 
 | |
|     // CPU utilization (%) on which to trigger the
 | |
|     // 'NodeCPUHighUsage' alert.
 | |
|     cpuHighUsageThreshold: 90,
 | |
|     // Load average 1m (per core) on which to trigger the
 | |
|     // 'NodeSystemSaturation' alert.
 | |
|     systemSaturationPerCoreThreshold: 2,
 | |
| 
 | |
|     // Available disk space (%) thresholds on which to trigger the
 | |
|     // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
 | |
|     // usage grows in a way that it is predicted to run out in 4h or 1d
 | |
|     // and if the provided thresholds have been reached right now.
 | |
|     // In some cases you'll want to adjust these, e.g., by default, Kubernetes
 | |
|     // runs the image garbage collection when the disk usage reaches 85%
 | |
|     // of its available space. In that case, you'll want to reduce the
 | |
|     // critical threshold below to something like 14 or 15, otherwise
 | |
|     // the alert could fire under normal node usage.
 | |
|     // Additionally, the prediction window for the alert can be configured
 | |
|     // to account for environments where disk usage can fluctuate within
 | |
|     // a short time frame. By extending the prediction window, you can
 | |
|     // reduce false positives caused by temporary spikes, providing a
 | |
|     // more accurate prediction of disk space issues.
 | |
|     fsSpaceFillingUpWarningThreshold: 40,
 | |
|     fsSpaceFillingUpCriticalThreshold: 20,
 | |
|     fsSpaceFillingUpPredictionWindow: '6h',
 | |
| 
 | |
|     // Available disk space (%) thresholds on which to trigger the
 | |
|     // 'NodeFilesystemAlmostOutOfSpace' alerts.
 | |
|     fsSpaceAvailableWarningThreshold: 5,
 | |
|     fsSpaceAvailableCriticalThreshold: 3,
 | |
| 
 | |
|     // Memory utilization (%) level on which to trigger the
 | |
|     // 'NodeMemoryHighUtilization' alert.
 | |
|     memoryHighUtilizationThreshold: 90,
 | |
| 
 | |
|     // Threshold for the rate of memory major page faults to trigger
 | |
|     // 'NodeMemoryMajorPagesFaults' alert.
 | |
|     memoryMajorPagesFaultsThreshold: 500,
 | |
| 
 | |
|     // Disk IO queue level above which to trigger
 | |
|     // 'NodeDiskIOSaturation' alert.
 | |
|     diskIOSaturationThreshold: 10,
 | |
| 
 | |
|     rateInterval: '5m',
 | |
|     // Opt-in for multi-cluster support.
 | |
|     showMultiCluster: false,
 | |
|     clusterLabel: 'cluster',
 | |
| 
 | |
|     dashboardNamePrefix: 'Node Exporter / ',
 | |
|     dashboardTags: ['node-exporter-mixin'],
 | |
|   },
 | |
| }
 |