From b8aac7c92e2d7f3dea56b2d4802d4ef3ee29c3cc Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Thu, 30 May 2024 13:00:31 +0530 Subject: [PATCH] collector/cpu: Support CPU online status Blocked by: https://github.com/prometheus/procfs/pull/644. Signed-off-by: Pranshu Srivastava --- collector/cpu_linux.go | 61 +++++++++++++++++++++++++++++++++++------- go.mod | 4 +-- go.sum | 8 ++---- 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 1ee7b94d..8ca70365 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -17,6 +17,7 @@ package collector import ( + "errors" "fmt" "log/slog" "os" @@ -26,15 +27,17 @@ import ( "strconv" "sync" + "golang.org/x/exp/maps" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" "github.com/prometheus/procfs/sysfs" - "golang.org/x/exp/maps" ) type cpuCollector struct { - fs procfs.FS + procfs procfs.FS + sysfs sysfs.FS cpu *prometheus.Desc cpuInfo *prometheus.Desc cpuFrequencyHz *prometheus.Desc @@ -45,6 +48,7 @@ type cpuCollector struct { cpuPackageThrottle *prometheus.Desc cpuIsolated *prometheus.Desc logger *slog.Logger + cpuOnline *prometheus.Desc cpuStats map[int64]procfs.CPUStat cpuStatsMutex sync.Mutex isolatedCpus []uint16 @@ -70,17 +74,17 @@ func init() { // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger *slog.Logger) (Collector, error) { - fs, err := procfs.NewFS(*procPath) + pfs, err := procfs.NewFS(*procPath) if err != nil { return nil, fmt.Errorf("failed to open procfs: %w", err) } - sysfs, err := sysfs.NewFS(*sysPath) + sfs, err := sysfs.NewFS(*sysPath) if err != nil { return nil, fmt.Errorf("failed to open sysfs: %w", err) } - isolcpus, err := sysfs.IsolatedCPUs() + isolcpus, err := sfs.IsolatedCPUs() if err != nil { if !os.IsNotExist(err) { return nil, fmt.Errorf("Unable to get isolated cpus: %w", err) @@ -89,8 +93,9 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { } c := &cpuCollector{ - fs: fs, - cpu: nodeCPUSecondsDesc, + procfs: pfs, + sysfs: sfs, + cpu: nodeCPUSecondsDesc, cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -131,6 +136,11 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { "Whether each core is isolated, information from /sys/devices/system/cpu/isolated.", []string{"cpu"}, nil, ), + cpuOnline: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "online"), + "CPUs that are online and being scheduled.", + []string{"cpu"}, nil, + ), logger: logger, isolatedCpus: isolcpus, cpuStats: make(map[int64]procfs.CPUStat), @@ -177,12 +187,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if c.isolatedCpus != nil { c.updateIsolated(ch) } - return c.updateThermalThrottle(ch) + err := c.updateThermalThrottle(ch) + if err != nil { + return err + } + err = c.updateOnline(ch) + if err != nil { + return err + } + + return nil } // updateInfo reads /proc/cpuinfo func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error { - info, err := c.fs.CPUInfo() + info, err := c.procfs.CPUInfo() if err != nil { return err } @@ -333,9 +352,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) { } } +// updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics. +func (c *cpuCollector) updateOnline(ch chan<- prometheus.Metric) error { + cpus, err := c.sysfs.CPUs() + if err != nil { + return err + } + // No-op if the system does not support CPU online stats. + cpu0 := cpus[0] + if _, err := cpu0.Online(); err != nil && errors.Is(err, os.ErrNotExist) { + return nil + } + for _, cpu := range cpus { + setOnline := float64(0) + if online, _ := cpu.Online(); online { + setOnline = 1 + } + ch <- prometheus.MustNewConstMetric(c.cpuOnline, prometheus.GaugeValue, setOnline, cpu.Number()) + } + + return nil +} + // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { - stats, err := c.fs.Stat() + stats, err := c.procfs.Stat() if err != nil { return err } diff --git a/go.mod b/go.mod index cb870e01..4d120b7f 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/coreos/go-systemd/v22 v22.5.0 github.com/dennwc/btrfs v0.0.0-20240418142341-0167142bde7a github.com/ema/qdisc v1.0.0 - github.com/go-kit/log v0.2.1 github.com/godbus/dbus/v5 v5.1.0 github.com/hashicorp/go-envparse v0.1.0 github.com/hodgesds/perf-utils v0.7.0 @@ -27,7 +26,7 @@ require ( github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.61.0 github.com/prometheus/exporter-toolkit v0.13.2 - github.com/prometheus/procfs v0.15.1 + github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b // == v0.15.1 + https://github.com/prometheus/procfs/commit/1754b780536bb81082baa913e04cc4fff4d2baea github.com/safchain/ethtool v0.5.9 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 golang.org/x/sys v0.28.0 @@ -39,7 +38,6 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dennwc/ioctl v1.0.0 // indirect - github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/klauspost/compress v1.17.9 // indirect diff --git a/go.sum b/go.sum index 2dbc00c7..7bc8a084 100644 --- a/go.sum +++ b/go.sum @@ -21,10 +21,6 @@ github.com/dennwc/ioctl v1.0.0 h1:DsWAAjIxRqNcLn9x6mwfuf2pet3iB7aK90K4tF16rLg= github.com/dennwc/ioctl v1.0.0/go.mod h1:ellh2YB5ldny99SBU/VX7Nq0xiZbHphf1DrtHxxjMk0= github.com/ema/qdisc v1.0.0 h1:EHLG08FVRbWLg8uRICa3xzC9Zm0m7HyMHfXobWFnXYg= github.com/ema/qdisc v1.0.0/go.mod h1:FhIc0fLYi7f+lK5maMsesDqwYojIOh3VfRs8EVd5YJQ= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= -github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= -github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -87,8 +83,8 @@ github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFS github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= github.com/prometheus/exporter-toolkit v0.13.2 h1:Z02fYtbqTMy2i/f+xZ+UK5jy/bl1Ex3ndzh06T/Q9DQ= github.com/prometheus/exporter-toolkit v0.13.2/go.mod h1:tCqnfx21q6qN1KA4U3Bfb8uWzXfijIrJz3/kTIqMV7g= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b h1:4EJkx3vycI+n5JY5ht+bnSUGamkmmXkpcNeO/OBT/0A= +github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/safchain/ethtool v0.5.9 h1://6RvaOKFf3nQ0rl5+8zBbE4/72455VC9Jq61pfq67E=