mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-12-28 06:59:44 -08:00
cpu: Metric 'package_throttles_total' is per package. (#657)
* cpu: Metric 'package_throttles_total' is per package. 'package_throttles_total' is per package, not per cpu. This also reduces the total number of cpu time series a lot (esp for multi core cpus). * cpu: Better handling of a cpulist edge-case. * cpu: Extract the package number from the directory name. Do not rely on the range index. * cpu: Add package_throttle_count for node0 cpu1 This file must be ignored by the cpu collector.
This commit is contained in:
parent
abb58a31e2
commit
b0d5c00832
|
@ -17,8 +17,11 @@ package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/common/log"
|
"github.com/prometheus/common/log"
|
||||||
|
@ -29,6 +32,10 @@ const (
|
||||||
cpuCollectorNamespace = "cpu"
|
cpuCollectorNamespace = "cpu"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
digitRegexp = regexp.MustCompile("[0-9]+")
|
||||||
|
)
|
||||||
|
|
||||||
type cpuCollector struct {
|
type cpuCollector struct {
|
||||||
cpu *prometheus.Desc
|
cpu *prometheus.Desc
|
||||||
cpuFreq *prometheus.Desc
|
cpuFreq *prometheus.Desc
|
||||||
|
@ -65,6 +72,7 @@ func NewCPUCollector() (Collector, error) {
|
||||||
"Maximum cpu thread frequency in hertz.",
|
"Maximum cpu thread frequency in hertz.",
|
||||||
[]string{"cpu"}, nil,
|
[]string{"cpu"}, nil,
|
||||||
),
|
),
|
||||||
|
// FIXME: This should be a per core metric, not per cpu!
|
||||||
cpuCoreThrottle: prometheus.NewDesc(
|
cpuCoreThrottle: prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"),
|
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"),
|
||||||
"Number of times this cpu core has been throttled.",
|
"Number of times this cpu core has been throttled.",
|
||||||
|
@ -73,7 +81,7 @@ func NewCPUCollector() (Collector, error) {
|
||||||
cpuPackageThrottle: prometheus.NewDesc(
|
cpuPackageThrottle: prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"),
|
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"),
|
||||||
"Number of times this cpu package has been throttled.",
|
"Number of times this cpu package has been throttled.",
|
||||||
[]string{"cpu"}, nil,
|
[]string{"node"}, nil,
|
||||||
),
|
),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -98,6 +106,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
||||||
|
|
||||||
var value uint64
|
var value uint64
|
||||||
|
|
||||||
|
// cpu loop
|
||||||
for _, cpu := range cpus {
|
for _, cpu := range cpus {
|
||||||
_, cpuname := filepath.Split(cpu)
|
_, cpuname := filepath.Split(cpu)
|
||||||
|
|
||||||
|
@ -106,17 +115,17 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
||||||
} else {
|
} else {
|
||||||
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
|
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
|
||||||
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
|
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
|
||||||
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil {
|
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_cur_freq")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
||||||
|
|
||||||
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil {
|
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_min_freq")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
||||||
|
|
||||||
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil {
|
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_max_freq")); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
|
||||||
|
@ -124,17 +133,44 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
|
||||||
|
|
||||||
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
|
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
|
||||||
log.Debugf("CPU %q is missing thermal_throttle", cpu)
|
log.Debugf("CPU %q is missing thermal_throttle", cpu)
|
||||||
} else {
|
continue
|
||||||
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
|
|
||||||
|
|
||||||
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname)
|
|
||||||
}
|
}
|
||||||
|
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
|
||||||
|
}
|
||||||
|
|
||||||
|
pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// package/node loop
|
||||||
|
for _, pkg := range pkgs {
|
||||||
|
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) {
|
||||||
|
log.Debugf("package %q is missing cpulist", pkg)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist"))
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("could not read cpulist of package %q", pkg)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// cpulist example of one package/node with HT: "0-11,24-35"
|
||||||
|
line := strings.Split(string(cpulist), "\n")[0]
|
||||||
|
firstCPU := strings.FieldsFunc(line, func(r rune) bool {
|
||||||
|
return r == '-' || r == ','
|
||||||
|
})[0]
|
||||||
|
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
|
||||||
|
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pkgno := digitRegexp.FindAllString(pkg, 1)[0]
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -299,9 +299,7 @@ node_cpu_frequency_min_hertz{cpu="cpu1"} 8e+08
|
||||||
node_cpu_frequency_min_hertz{cpu="cpu3"} 1e+06
|
node_cpu_frequency_min_hertz{cpu="cpu3"} 1e+06
|
||||||
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
|
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
|
||||||
# TYPE node_cpu_package_throttles_total counter
|
# TYPE node_cpu_package_throttles_total counter
|
||||||
node_cpu_package_throttles_total{cpu="cpu0"} 30
|
node_cpu_package_throttles_total{node="0"} 30
|
||||||
node_cpu_package_throttles_total{cpu="cpu1"} 30
|
|
||||||
node_cpu_package_throttles_total{cpu="cpu2"} 6
|
|
||||||
# HELP node_disk_bytes_read The total number of bytes read successfully.
|
# HELP node_disk_bytes_read The total number of bytes read successfully.
|
||||||
# TYPE node_disk_bytes_read counter
|
# TYPE node_disk_bytes_read counter
|
||||||
node_disk_bytes_read{device="dm-0"} 5.13708655616e+11
|
node_disk_bytes_read{device="dm-0"} 5.13708655616e+11
|
||||||
|
|
|
@ -116,6 +116,42 @@ Lines: 1
|
||||||
1000
|
1000
|
||||||
Mode: 644
|
Mode: 644
|
||||||
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices/node0
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices/node0/cpu0
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices/node0/cpu0/thermal_throttle
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Path: sys/bus/node/devices/node0/cpu0/thermal_throttle/package_throttle_count
|
||||||
|
Lines: 1
|
||||||
|
30
|
||||||
|
Mode: 644
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices/node0/cpu1
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Directory: sys/bus/node/devices/node0/cpu1/thermal_throttle
|
||||||
|
Mode: 755
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Path: sys/bus/node/devices/node0/cpu1/thermal_throttle/package_throttle_count
|
||||||
|
Lines: 1
|
||||||
|
30
|
||||||
|
Mode: 644
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
Path: sys/bus/node/devices/node0/cpulist
|
||||||
|
Lines: 1
|
||||||
|
0-3
|
||||||
|
Mode: 644
|
||||||
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
Directory: sys/class
|
Directory: sys/class
|
||||||
Mode: 755
|
Mode: 755
|
||||||
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
|
Loading…
Reference in a new issue