mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Expose /proc/pressure (#1261)
This enables the collection of pressure stall information as exposed by the `/proc/pressure` interface added in the 4.20 release of the Linux kernel. Closes #1174 Signed-off-by: Daniele Sluijters <daenney@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									4e5c4d464f
								
							
						
					
					
						commit
						cc2fd82008
					
				|  | @ -29,6 +29,7 @@ | ||||||
| * [FEATURE] Add uname collector for FreeBSD #1239 | * [FEATURE] Add uname collector for FreeBSD #1239 | ||||||
| * [FEATURE] Add diskstats collector for OpenBSD #1250 | * [FEATURE] Add diskstats collector for OpenBSD #1250 | ||||||
| * [CHANGE] Bonding state uses mii_status #1124 | * [CHANGE] Bonding state uses mii_status #1124 | ||||||
|  | * [FEATURE] Add pressure collector exposing pressure stall information for Linux #1174 | ||||||
| 
 | 
 | ||||||
| ## 0.17.0 / 2018-11-30 | ## 0.17.0 / 2018-11-30 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -73,6 +73,7 @@ logind | Exposes session counts from [logind](http://www.freedesktop.org/wiki/So | ||||||
| meminfo\_numa | Exposes memory statistics from `/proc/meminfo_numa`. | Linux | meminfo\_numa | Exposes memory statistics from `/proc/meminfo_numa`. | Linux | ||||||
| mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux | mountstats | Exposes filesystem statistics from `/proc/self/mountstats`. Exposes detailed NFS client statistics. | Linux | ||||||
| ntp | Exposes local NTP daemon health to check [time](./docs/TIME.md) | _any_ | ntp | Exposes local NTP daemon health to check [time](./docs/TIME.md) | _any_ | ||||||
|  | pressure | Exposes pressure stall statistics from `/proc/pressure/`. | Linux (kernel 4.20+ and/or [CONFIG\_PSI](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/accounting/psi.txt)) | ||||||
| processes | Exposes aggregate process statistics from `/proc`. | Linux | processes | Exposes aggregate process statistics from `/proc`. | Linux | ||||||
| qdisc | Exposes [queuing discipline](https://en.wikipedia.org/wiki/Network_scheduler#Linux_kernel) statistics | Linux | qdisc | Exposes [queuing discipline](https://en.wikipedia.org/wiki/Network_scheduler#Linux_kernel) statistics | Linux | ||||||
| runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ | runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ | ||||||
|  |  | ||||||
|  | @ -2289,6 +2289,21 @@ node_nfsd_server_rpcs_total 18628 | ||||||
| # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. | # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. | ||||||
| # TYPE node_nfsd_server_threads gauge | # TYPE node_nfsd_server_threads gauge | ||||||
| node_nfsd_server_threads 8 | node_nfsd_server_threads 8 | ||||||
|  | # HELP node_pressure_cpu_waiting_seconds_total Total time in seconds that processes have waited for CPU time | ||||||
|  | # TYPE node_pressure_cpu_waiting_seconds_total counter | ||||||
|  | node_pressure_cpu_waiting_seconds_total 14.036781000000001 | ||||||
|  | # HELP node_pressure_io_stalled_seconds_total Total time in seconds no process could make progress due to IO congestion | ||||||
|  | # TYPE node_pressure_io_stalled_seconds_total counter | ||||||
|  | node_pressure_io_stalled_seconds_total 159.229614 | ||||||
|  | # HELP node_pressure_io_waiting_seconds_total Total time in seconds that processes have waited due to IO congestion | ||||||
|  | # TYPE node_pressure_io_waiting_seconds_total counter | ||||||
|  | node_pressure_io_waiting_seconds_total 159.886802 | ||||||
|  | # HELP node_pressure_memory_stalled_seconds_total Total time in seconds no process could make progress due to memory congestion | ||||||
|  | # TYPE node_pressure_memory_stalled_seconds_total counter | ||||||
|  | node_pressure_memory_stalled_seconds_total 0 | ||||||
|  | # HELP node_pressure_memory_waiting_seconds_total Total time in seconds that processes have waited for memory | ||||||
|  | # TYPE node_pressure_memory_waiting_seconds_total counter | ||||||
|  | node_pressure_memory_waiting_seconds_total 0 | ||||||
| # HELP node_processes_max_processes Number of max PIDs limit | # HELP node_processes_max_processes Number of max PIDs limit | ||||||
| # TYPE node_processes_max_processes gauge | # TYPE node_processes_max_processes gauge | ||||||
| node_processes_max_processes 123 | node_processes_max_processes 123 | ||||||
|  | @ -2361,6 +2376,7 @@ node_scrape_collector_success{collector="netdev"} 1 | ||||||
| node_scrape_collector_success{collector="netstat"} 1 | node_scrape_collector_success{collector="netstat"} 1 | ||||||
| node_scrape_collector_success{collector="nfs"} 1 | node_scrape_collector_success{collector="nfs"} 1 | ||||||
| node_scrape_collector_success{collector="nfsd"} 1 | node_scrape_collector_success{collector="nfsd"} 1 | ||||||
|  | node_scrape_collector_success{collector="pressure"} 1 | ||||||
| node_scrape_collector_success{collector="processes"} 1 | node_scrape_collector_success{collector="processes"} 1 | ||||||
| node_scrape_collector_success{collector="qdisc"} 1 | node_scrape_collector_success{collector="qdisc"} 1 | ||||||
| node_scrape_collector_success{collector="sockstat"} 1 | node_scrape_collector_success{collector="sockstat"} 1 | ||||||
|  |  | ||||||
|  | @ -2289,6 +2289,21 @@ node_nfsd_server_rpcs_total 18628 | ||||||
| # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. | # HELP node_nfsd_server_threads Total number of NFSd kernel threads that are running. | ||||||
| # TYPE node_nfsd_server_threads gauge | # TYPE node_nfsd_server_threads gauge | ||||||
| node_nfsd_server_threads 8 | node_nfsd_server_threads 8 | ||||||
|  | # HELP node_pressure_cpu_waiting_seconds_total Total time in seconds that processes have waited for CPU time | ||||||
|  | # TYPE node_pressure_cpu_waiting_seconds_total counter | ||||||
|  | node_pressure_cpu_waiting_seconds_total 14.036781000000001 | ||||||
|  | # HELP node_pressure_io_stalled_seconds_total Total time in seconds no process could make progress due to IO congestion | ||||||
|  | # TYPE node_pressure_io_stalled_seconds_total counter | ||||||
|  | node_pressure_io_stalled_seconds_total 159.229614 | ||||||
|  | # HELP node_pressure_io_waiting_seconds_total Total time in seconds that processes have waited due to IO congestion | ||||||
|  | # TYPE node_pressure_io_waiting_seconds_total counter | ||||||
|  | node_pressure_io_waiting_seconds_total 159.886802 | ||||||
|  | # HELP node_pressure_memory_stalled_seconds_total Total time in seconds no process could make progress due to memory congestion | ||||||
|  | # TYPE node_pressure_memory_stalled_seconds_total counter | ||||||
|  | node_pressure_memory_stalled_seconds_total 0 | ||||||
|  | # HELP node_pressure_memory_waiting_seconds_total Total time in seconds that processes have waited for memory | ||||||
|  | # TYPE node_pressure_memory_waiting_seconds_total counter | ||||||
|  | node_pressure_memory_waiting_seconds_total 0 | ||||||
| # HELP node_processes_max_processes Number of max PIDs limit | # HELP node_processes_max_processes Number of max PIDs limit | ||||||
| # TYPE node_processes_max_processes gauge | # TYPE node_processes_max_processes gauge | ||||||
| node_processes_max_processes 123 | node_processes_max_processes 123 | ||||||
|  | @ -2361,6 +2376,7 @@ node_scrape_collector_success{collector="netdev"} 1 | ||||||
| node_scrape_collector_success{collector="netstat"} 1 | node_scrape_collector_success{collector="netstat"} 1 | ||||||
| node_scrape_collector_success{collector="nfs"} 1 | node_scrape_collector_success{collector="nfs"} 1 | ||||||
| node_scrape_collector_success{collector="nfsd"} 1 | node_scrape_collector_success{collector="nfsd"} 1 | ||||||
|  | node_scrape_collector_success{collector="pressure"} 1 | ||||||
| node_scrape_collector_success{collector="processes"} 1 | node_scrape_collector_success{collector="processes"} 1 | ||||||
| node_scrape_collector_success{collector="qdisc"} 1 | node_scrape_collector_success{collector="qdisc"} 1 | ||||||
| node_scrape_collector_success{collector="sockstat"} 1 | node_scrape_collector_success{collector="sockstat"} 1 | ||||||
|  |  | ||||||
							
								
								
									
										1
									
								
								collector/fixtures/proc/pressure/cpu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								collector/fixtures/proc/pressure/cpu
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | ||||||
|  | some avg10=0.00 avg60=0.00 avg300=0.00 total=14036781 | ||||||
							
								
								
									
										2
									
								
								collector/fixtures/proc/pressure/io
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								collector/fixtures/proc/pressure/io
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,2 @@ | ||||||
|  | some avg10=0.18 avg60=0.34 avg300=0.10 total=159886802 | ||||||
|  | full avg10=0.18 avg60=0.34 avg300=0.10 total=159229614 | ||||||
							
								
								
									
										2
									
								
								collector/fixtures/proc/pressure/memory
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								collector/fixtures/proc/pressure/memory
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,2 @@ | ||||||
|  | some avg10=0.00 avg60=0.00 avg300=0.00 total=0 | ||||||
|  | full avg10=0.00 avg60=0.00 avg300=0.00 total=0 | ||||||
							
								
								
									
										105
									
								
								collector/pressure_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								collector/pressure_linux.go
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,105 @@ | ||||||
|  | // Copyright 2019 The Prometheus Authors
 | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||||
|  | // you may not use this file except in compliance with the License.
 | ||||||
|  | // You may obtain a copy of the License at
 | ||||||
|  | //
 | ||||||
|  | // http://www.apache.org/licenses/LICENSE-2.0
 | ||||||
|  | //
 | ||||||
|  | // Unless required by applicable law or agreed to in writing, software
 | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||||
|  | // See the License for the specific language governing permissions and
 | ||||||
|  | // limitations under the License.
 | ||||||
|  | 
 | ||||||
|  | // +build !nopressure
 | ||||||
|  | 
 | ||||||
|  | package collector | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 
 | ||||||
|  | 	"github.com/prometheus/client_golang/prometheus" | ||||||
|  | 	"github.com/prometheus/common/log" | ||||||
|  | 	"github.com/prometheus/procfs" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | var ( | ||||||
|  | 	psiResources = []string{"cpu", "io", "memory"} | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | type pressureStatsCollector struct { | ||||||
|  | 	cpu     *prometheus.Desc | ||||||
|  | 	io      *prometheus.Desc | ||||||
|  | 	ioFull  *prometheus.Desc | ||||||
|  | 	mem     *prometheus.Desc | ||||||
|  | 	memFull *prometheus.Desc | ||||||
|  | 
 | ||||||
|  | 	fs procfs.FS | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func init() { | ||||||
|  | 	registerCollector("pressure", defaultEnabled, NewPressureStatsCollector) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // NewPressureStatsCollector returns a Collector exposing pressure stall information
 | ||||||
|  | func NewPressureStatsCollector() (Collector, error) { | ||||||
|  | 	fs, err := procfs.NewFS(*procPath) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, fmt.Errorf("failed to open procfs: %v", err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return &pressureStatsCollector{ | ||||||
|  | 		cpu: prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName(namespace, "pressure", "cpu_waiting_seconds_total"), | ||||||
|  | 			"Total time in seconds that processes have waited for CPU time", | ||||||
|  | 			nil, nil, | ||||||
|  | 		), | ||||||
|  | 		io: prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName(namespace, "pressure", "io_waiting_seconds_total"), | ||||||
|  | 			"Total time in seconds that processes have waited due to IO congestion", | ||||||
|  | 			nil, nil, | ||||||
|  | 		), | ||||||
|  | 		ioFull: prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName(namespace, "pressure", "io_stalled_seconds_total"), | ||||||
|  | 			"Total time in seconds no process could make progress due to IO congestion", | ||||||
|  | 			nil, nil, | ||||||
|  | 		), | ||||||
|  | 		mem: prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName(namespace, "pressure", "memory_waiting_seconds_total"), | ||||||
|  | 			"Total time in seconds that processes have waited for memory", | ||||||
|  | 			nil, nil, | ||||||
|  | 		), | ||||||
|  | 		memFull: prometheus.NewDesc( | ||||||
|  | 			prometheus.BuildFQName(namespace, "pressure", "memory_stalled_seconds_total"), | ||||||
|  | 			"Total time in seconds no process could make progress due to memory congestion", | ||||||
|  | 			nil, nil, | ||||||
|  | 		), | ||||||
|  | 		fs: fs, | ||||||
|  | 	}, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Update calls procfs.NewPSIStatsForResource for the different resources and updates the values
 | ||||||
|  | func (c *pressureStatsCollector) Update(ch chan<- prometheus.Metric) error { | ||||||
|  | 	for _, res := range psiResources { | ||||||
|  | 		log.Debugf("collecting statistics for resource: %s", res) | ||||||
|  | 		vals, err := c.fs.NewPSIStatsForResource(res) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Debug("pressure information is unavailable, you need a Linux kernel >= 4.20 and/or CONFIG_PSI enabled for your kernel") | ||||||
|  | 			return nil | ||||||
|  | 		} | ||||||
|  | 		switch res { | ||||||
|  | 		case "cpu": | ||||||
|  | 			ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | ||||||
|  | 		case "io": | ||||||
|  | 			ch <- prometheus.MustNewConstMetric(c.io, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | ||||||
|  | 			ch <- prometheus.MustNewConstMetric(c.ioFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) | ||||||
|  | 		case "memory": | ||||||
|  | 			ch <- prometheus.MustNewConstMetric(c.mem, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | ||||||
|  | 			ch <- prometheus.MustNewConstMetric(c.memFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) | ||||||
|  | 		default: | ||||||
|  | 			log.Debugf("did not account for resource: %s", res) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | @ -28,6 +28,7 @@ enabled_collectors=$(cat << COLLECTORS | ||||||
|   netstat |   netstat | ||||||
|   nfs |   nfs | ||||||
|   nfsd |   nfsd | ||||||
|  |   pressure | ||||||
|   qdisc |   qdisc | ||||||
|   sockstat |   sockstat | ||||||
|   stat |   stat | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue