From 512e086dec6fd5d3ebc30974066f69f69b1f85b9 Mon Sep 17 00:00:00 2001 From: david Date: Wed, 15 Dec 2021 12:07:38 +0100 Subject: [PATCH] Implement #2250: Add "isolated" label on cpu collector on linux Signed-off-by: david --- collector/cpu_linux.go | 99 +++++++++++++++++++++++++++++++------ collector/cpu_linux_test.go | 28 +++++++++++ 2 files changed, 113 insertions(+), 14 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 84ad373b..eec6c4eb 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -18,9 +18,11 @@ package collector import ( "fmt" + "io/ioutil" "path/filepath" "regexp" "strconv" + "strings" "sync" "github.com/go-kit/log" @@ -42,6 +44,7 @@ type cpuCollector struct { logger log.Logger cpuStats []procfs.CPUStat cpuStatsMutex sync.Mutex + isolatedCpus []uint16 cpuFlagsIncludeRegexp *regexp.Regexp cpuBugsIncludeRegexp *regexp.Regexp @@ -62,15 +65,69 @@ func init() { registerCollector("cpu", defaultEnabled, NewCPUCollector) } +func parseIsolCpus(data []byte) ([]uint16, error) { + isolcpus_str := strings.TrimRight(string(data), "\n") + + var isolcpus_int = []uint16{} + + for _, cpu := range strings.Split(isolcpus_str, ",") { + if cpu == "" { + continue + } + if strings.Contains(cpu, "-") { + ranges := strings.Split(cpu, "-") + startRange, err := strconv.Atoi(ranges[0]) + if err != nil { + return nil, err + } + endRange, err := strconv.Atoi(ranges[1]) + if err != nil { + return nil, err + } + + for i := startRange; i <= endRange; i++ { + isolcpus_int = append(isolcpus_int, uint16(i)) + } + continue + } + + _cpu, err := strconv.Atoi(cpu) + if err != nil { + return nil, err + } + isolcpus_int = append(isolcpus_int, uint16(_cpu)) + } + return isolcpus_int, nil +} + +func readIsolCpus() ([]uint16, error) { + isolcpus, err := ioutil.ReadFile(sysFilePath("devices/system/cpu/isolated")) + if err != nil { + return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) + } + + return parseIsolCpus(isolcpus) +} + // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger log.Logger) (Collector, error) { fs, err := procfs.NewFS(*procPath) if err != nil { return nil, fmt.Errorf("failed to open procfs: %w", err) } + + isolcpus, err := readIsolCpus() + if err != nil { + return nil, fmt.Errorf("failed to open procfs: %w", err) + } + c := &cpuCollector{ - fs: fs, - cpu: nodeCPUSecondsDesc, + fs: fs, + cpu: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"), + "Seconds the CPUs spent in each mode.", + []string{"cpu", "mode", "isolated"}, nil, + ), cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -89,7 +146,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { cpuGuest: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), "Seconds the CPUs spent in guests (VMs) for each mode.", - []string{"cpu", "mode"}, nil, + []string{"cpu", "mode", "isolated"}, nil, ), cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), @@ -101,7 +158,8 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { "Number of times this CPU package has been throttled.", []string{"package"}, nil, ), - logger: logger, + logger: logger, + isolatedCpus: isolcpus, } err = c.compileIncludeFlags(flagsInclude, bugsInclude) if err != nil { @@ -276,6 +334,15 @@ func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error return nil } +func contains(s []uint16, e uint16) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { stats, err := c.fs.Stat() @@ -290,19 +357,23 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { defer c.cpuStatsMutex.Unlock() for cpuID, cpuStat := range c.cpuStats { cpuNum := strconv.Itoa(cpuID) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") + isIsolated := "0" + if contains(c.isolatedCpus, uint16(cpuID)) { + isIsolated = "1" + } + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal", isIsolated) if *enableCPUGuest { // Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice", isIsolated) } } diff --git a/collector/cpu_linux_test.go b/collector/cpu_linux_test.go index 93b493b2..7bcd5e49 100644 --- a/collector/cpu_linux_test.go +++ b/collector/cpu_linux_test.go @@ -104,3 +104,31 @@ func TestCPU(t *testing.T) { t.Fatalf("should have %v CPU Stat: got %v", resetIdle, got) } } +func TestIsolatedParsingCPU(t *testing.T) { + var testParams = []struct { + in []byte + res []uint16 + err error + }{ + {[]byte(""), []uint16{}, nil}, + {[]byte("1\n"), []uint16{1}, nil}, + {[]byte("1"), []uint16{1}, nil}, + {[]byte("1,2"), []uint16{1, 2}, nil}, + {[]byte("1-2"), []uint16{1, 2}, nil}, + {[]byte("1-3"), []uint16{1, 2, 3}, nil}, + {[]byte("1,2-4"), []uint16{1, 2, 3, 4}, nil}, + {[]byte("1,3-4"), []uint16{1, 3, 4}, nil}, + {[]byte("1,3-4,7,20-21"), []uint16{1, 3, 4, 7, 20, 21}, nil}, + } + for _, params := range testParams { + t.Run("blabla", func(t *testing.T) { + res, err := parseIsolCpus(params.in) + if !reflect.DeepEqual(res, params.res) { + t.Fatalf("should have %v result: got %v", params.res, res) + } + if err != params.err { + t.Fatalf("should have %v error: got %v", params.err, err) + } + }) + } +}