mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Add thresholds for memory alerts
Signed-off-by: Vitaly Zhuravlev <v-zhuravlev@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									2111e70ac7
								
							
						
					
					
						commit
						77ae769179
					
				|  | @ -344,7 +344,7 @@ | ||||||
|           { |           { | ||||||
|             alert: 'NodeMemoryMajorPagesFaults', |             alert: 'NodeMemoryMajorPagesFaults', | ||||||
|             expr: ||| |             expr: ||| | ||||||
|               rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > 500 |               rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > %(memoryMajorPagesFaultsWarningThreshold)s | ||||||
|             ||| % $._config, |             ||| % $._config, | ||||||
|             'for': '15m', |             'for': '15m', | ||||||
|             labels: { |             labels: { | ||||||
|  | @ -353,15 +353,15 @@ | ||||||
|             annotations: { |             annotations: { | ||||||
|               summary: 'Memory major page faults are occurring at very high rate.', |               summary: 'Memory major page faults are occurring at very high rate.', | ||||||
|               description: ||| |               description: ||| | ||||||
|                 Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. |                 Memory major pages are occurring at very high rate at {{ $labels.instance }}, %(memoryMajorPagesFaultsWarningThreshold)s major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. | ||||||
|                 Please check that there is enough memory available at this instance. |                 Please check that there is enough memory available at this instance. | ||||||
|               |||, |               ||| % $._config, | ||||||
|             }, |             }, | ||||||
|           }, |           }, | ||||||
|           { |           { | ||||||
|             alert: 'NodeMemoryHighUtilization', |             alert: 'NodeMemoryHighUtilization', | ||||||
|             expr: ||| |             expr: ||| | ||||||
|               100 - (node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} / node_memory_MemTotal_bytes{%(nodeExporterSelector)s} * 100) > 90 |               100 - (node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} / node_memory_MemTotal_bytes{%(nodeExporterSelector)s} * 100) > %(memoryHighUtilizationThreshold)s | ||||||
|             ||| % $._config, |             ||| % $._config, | ||||||
|             'for': '15m', |             'for': '15m', | ||||||
|             labels: { |             labels: { | ||||||
|  | @ -370,7 +370,7 @@ | ||||||
|             annotations: { |             annotations: { | ||||||
|               summary: 'Host is running out of memory.', |               summary: 'Host is running out of memory.', | ||||||
|               description: ||| |               description: ||| | ||||||
|                 Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. |                 Memory is filling up at {{ $labels.instance }}, has been above %(memoryHighUtilizationThreshold)s% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. | ||||||
|               |||, |               |||, | ||||||
|             }, |             }, | ||||||
|           }, |           }, | ||||||
|  |  | ||||||
|  | @ -60,6 +60,14 @@ | ||||||
|     fsSpaceAvailableWarningThreshold: 5, |     fsSpaceAvailableWarningThreshold: 5, | ||||||
|     fsSpaceAvailableCriticalThreshold: 3, |     fsSpaceAvailableCriticalThreshold: 3, | ||||||
| 
 | 
 | ||||||
|  |     // Memory utilzation (%) level on which to trigger the | ||||||
|  |     // 'NodeMemoryHighUtilization' alert. | ||||||
|  |     memoryHighUtilizationThreshold: 90, | ||||||
|  | 
 | ||||||
|  |     // Threshold for the rate of memory major page faults to trigger | ||||||
|  |     // 'NodeMemoryMajorPagesFaults' alert. | ||||||
|  |     memoryMajorPagesFaultsWarningThreshold: 500, | ||||||
|  | 
 | ||||||
|     rateInterval: '5m', |     rateInterval: '5m', | ||||||
|     // Opt-in for multi-cluster support. |     // Opt-in for multi-cluster support. | ||||||
|     showMultiCluster: false, |     showMultiCluster: false, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue