From c504c7e2649aec533678cc83d65230f4237a0c04 Mon Sep 17 00:00:00 2001 From: Rene Treffer Date: Tue, 27 Feb 2018 19:43:15 +0100 Subject: [PATCH] Only report core throttles per core, not per cpu (#836) * Only report core throttles per core, not per cpu * Add topology/core_id to the cpu sysfs fixtures * Add new cpu fixtures to ttar file * Merge core_id reading and thermal throttle accounting * Declare core_id --- collector/cpu_linux.go | 23 ++++++++++++++++----- collector/fixtures/e2e-output.txt | 5 ++--- collector/fixtures/sys.ttar | 34 ++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 4baf0ee1..cd18bd90 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -21,6 +21,7 @@ import ( "os" "path/filepath" "regexp" + "strconv" "strings" "github.com/prometheus/client_golang/prometheus" @@ -70,11 +71,10 @@ func NewCPUCollector() (Collector, error) { "Maximum cpu thread frequency in hertz.", []string{"cpu"}, nil, ), - // FIXME: This should be a per core metric, not per cpu! cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), "Number of times this cpu core has been throttled.", - []string{"cpu"}, nil, + []string{"core"}, nil, ), cpuPackageThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "package_throttles_total"), @@ -104,6 +104,8 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { var value uint64 + cpu_core_throttles := make(map[int]uint64) + // cpu loop for _, cpu := range cpus { _, cpuName := filepath.Split(cpu) @@ -134,10 +136,21 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { log.Debugf("CPU %v is missing thermal_throttle", cpu) continue } - if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil { - return err + + if value, err := readUintFromFile(filepath.Join(cpu, "topology/core_id")); err != nil { + log.Debugf("CPU %v is misssing topology/core_id", cpu) + } else { + core_id := int(value) + if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil { + return err + } + cpu_core_throttles[core_id] = value } - ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuNum) + } + + // core throttles + for core_id, value := range cpu_core_throttles { + ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), strconv.Itoa(core_id)) } nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*")) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 26ac840f..1ad4afd3 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -180,9 +180,8 @@ node_buddyinfo_blocks{node="0",size="9",zone="Normal"} 0 node_context_switches_total 3.8014093e+07 # HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. # TYPE node_cpu_core_throttles_total counter -node_cpu_core_throttles_total{cpu="0"} 5 -node_cpu_core_throttles_total{cpu="1"} 0 -node_cpu_core_throttles_total{cpu="2"} 40 +node_cpu_core_throttles_total{core="0"} 5 +node_cpu_core_throttles_total{core="1"} 0 # HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz. # TYPE node_cpu_frequency_hertz gauge node_cpu_frequency_hertz{cpu="0"} 1.699981e+09 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 199f6280..03f8fb9b 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -45,6 +45,14 @@ Lines: 1 30 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/bus/cpu/devices/cpu0/topology +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/bus/cpu/devices/cpu0/topology/core_id +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus/cpu/devices/cpu1 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -79,6 +87,14 @@ Lines: 1 30 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/bus/cpu/devices/cpu1/topology +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/bus/cpu/devices/cpu1/topology/core_id +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus/cpu/devices/cpu2 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -87,7 +103,7 @@ Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/bus/cpu/devices/cpu2/thermal_throttle/core_throttle_count Lines: 1 -40 +5 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/bus/cpu/devices/cpu2/thermal_throttle/package_throttle_count @@ -95,6 +111,14 @@ Lines: 1 6 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/bus/cpu/devices/cpu2/topology +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/bus/cpu/devices/cpu2/topology/core_id +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus/cpu/devices/cpu3 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -116,6 +140,14 @@ Lines: 1 1000 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/bus/cpu/devices/cpu3/topology +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/bus/cpu/devices/cpu3/topology/core_id +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/bus/node Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -