| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | // Copyright 2019 The Prometheus Authors
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-03 04:35:24 -07:00
										 |  |  | //go:build !nopressure
 | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | // +build !nopressure
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package collector | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2020-09-01 07:21:36 -07:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 	"fmt" | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 	"log/slog" | 
					
						
							| 
									
										
										
										
											2020-09-01 07:21:36 -07:00
										 |  |  | 	"os" | 
					
						
							| 
									
										
										
										
											2021-03-01 10:44:21 -08:00
										 |  |  | 	"syscall" | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/prometheus/client_golang/prometheus" | 
					
						
							|  |  |  | 	"github.com/prometheus/procfs" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var ( | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 	psiResources = []string{"cpu", "io", "memory", "irq"} | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type pressureStatsCollector struct { | 
					
						
							|  |  |  | 	cpu     *prometheus.Desc | 
					
						
							|  |  |  | 	io      *prometheus.Desc | 
					
						
							|  |  |  | 	ioFull  *prometheus.Desc | 
					
						
							|  |  |  | 	mem     *prometheus.Desc | 
					
						
							|  |  |  | 	memFull *prometheus.Desc | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 	irqFull *prometheus.Desc | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	fs procfs.FS | 
					
						
							| 
									
										
										
										
											2019-12-31 08:19:37 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 	logger *slog.Logger | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func init() { | 
					
						
							|  |  |  | 	registerCollector("pressure", defaultEnabled, NewPressureStatsCollector) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // NewPressureStatsCollector returns a Collector exposing pressure stall information
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | func NewPressureStatsCollector(logger *slog.Logger) (Collector, error) { | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 	fs, err := procfs.NewFS(*procPath) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2019-11-29 05:51:31 -08:00
										 |  |  | 		return nil, fmt.Errorf("failed to open procfs: %w", err) | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return &pressureStatsCollector{ | 
					
						
							|  |  |  | 		cpu: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "cpu_waiting_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds that processes have waited for CPU time", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		io: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "io_waiting_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds that processes have waited due to IO congestion", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		ioFull: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "io_stalled_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds no process could make progress due to IO congestion", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		mem: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "memory_waiting_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds that processes have waited for memory", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		memFull: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "memory_stalled_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds no process could make progress due to memory congestion", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 		irqFull: prometheus.NewDesc( | 
					
						
							|  |  |  | 			prometheus.BuildFQName(namespace, "pressure", "irq_stalled_seconds_total"), | 
					
						
							|  |  |  | 			"Total time in seconds no process could make progress due to IRQ congestion", | 
					
						
							|  |  |  | 			nil, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							| 
									
										
										
										
											2019-12-31 08:19:37 -08:00
										 |  |  | 		fs:     fs, | 
					
						
							|  |  |  | 		logger: logger, | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 	}, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Update calls procfs.NewPSIStatsForResource for the different resources and updates the values
 | 
					
						
							|  |  |  | func (c *pressureStatsCollector) Update(ch chan<- prometheus.Metric) error { | 
					
						
							|  |  |  | 	for _, res := range psiResources { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 		c.logger.Debug("collecting statistics for resource", "resource", res) | 
					
						
							| 
									
										
										
										
											2019-06-12 11:47:16 -07:00
										 |  |  | 		vals, err := c.fs.PSIStatsForResource(res) | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2020-09-01 07:21:36 -07:00
										 |  |  | 			if errors.Is(err, os.ErrNotExist) { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 				c.logger.Debug("pressure information is unavailable, you need a Linux kernel >= 4.20 and/or CONFIG_PSI enabled for your kernel") | 
					
						
							| 
									
										
										
										
											2020-09-01 07:21:36 -07:00
										 |  |  | 				return ErrNoData | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2021-03-01 10:44:21 -08:00
										 |  |  | 			if errors.Is(err, syscall.ENOTSUP) { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 				c.logger.Debug("pressure information is disabled, add psi=1 kernel command line to enable it") | 
					
						
							| 
									
										
										
										
											2021-03-01 10:44:21 -08:00
										 |  |  | 				return ErrNoData | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2020-09-01 07:21:36 -07:00
										 |  |  | 			return fmt.Errorf("failed to retrieve pressure stats: %w", err) | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 		// IRQ pressure does not have 'some' data.
 | 
					
						
							| 
									
										
										
										
											2024-06-11 03:42:57 -07:00
										 |  |  | 		// See https://github.com/torvalds/linux/blob/v6.9/include/linux/psi_types.h#L65
 | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 		if vals.Some == nil && res != "irq" { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 			c.logger.Debug("pressure information returned no 'some' data") | 
					
						
							| 
									
										
										
										
											2024-05-15 23:38:58 -07:00
										 |  |  | 			return ErrNoData | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-06-19 02:16:10 -07:00
										 |  |  | 		if vals.Full == nil && res != "cpu" { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 			c.logger.Debug("pressure information returned no 'full' data") | 
					
						
							| 
									
										
										
										
											2024-05-15 23:38:58 -07:00
										 |  |  | 			return ErrNoData | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 		switch res { | 
					
						
							|  |  |  | 		case "cpu": | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | 
					
						
							|  |  |  | 		case "io": | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.io, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.ioFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) | 
					
						
							|  |  |  | 		case "memory": | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.mem, prometheus.CounterValue, float64(vals.Some.Total)/1000.0/1000.0) | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.memFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) | 
					
						
							| 
									
										
										
										
											2024-06-11 03:31:42 -07:00
										 |  |  | 		case "irq": | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric(c.irqFull, prometheus.CounterValue, float64(vals.Full.Total)/1000.0/1000.0) | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 		default: | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 			c.logger.Debug("did not account for resource", "resource", res) | 
					
						
							| 
									
										
										
										
											2019-04-18 03:19:20 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } |