mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-12-27 14:39:53 -08:00
Expose cpu bugs and flags as info metrics. (#1788)
* Expose cpu bugs and flags as info metrics with a regexp filter. * Automatically enable CPU info metrics when using flags or bugs feature. Signed-off-by: domgoer <domdoumc@gmail.com>
This commit is contained in:
parent
f4b89c79a2
commit
503e4fc848
|
@ -18,6 +18,7 @@ package collector
|
|||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
|
@ -32,16 +33,23 @@ type cpuCollector struct {
|
|||
fs procfs.FS
|
||||
cpu *prometheus.Desc
|
||||
cpuInfo *prometheus.Desc
|
||||
cpuFlagsInfo *prometheus.Desc
|
||||
cpuBugsInfo *prometheus.Desc
|
||||
cpuGuest *prometheus.Desc
|
||||
cpuCoreThrottle *prometheus.Desc
|
||||
cpuPackageThrottle *prometheus.Desc
|
||||
logger log.Logger
|
||||
cpuStats []procfs.CPUStat
|
||||
cpuStatsMutex sync.Mutex
|
||||
|
||||
cpuFlagsIncludeRegexp *regexp.Regexp
|
||||
cpuBugsIncludeRegexp *regexp.Regexp
|
||||
}
|
||||
|
||||
var (
|
||||
enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool()
|
||||
flagsInclude = kingpin.Flag("collector.cpu.info.flags-include", "Filter the `flags` field in cpuInfo with a value that must be a regular expression").String()
|
||||
bugsInclude = kingpin.Flag("collector.cpu.info.bugs-include", "Filter the `bugs` field in cpuInfo with a value that must be a regular expression").String()
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -54,7 +62,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open procfs: %w", err)
|
||||
}
|
||||
return &cpuCollector{
|
||||
c := &cpuCollector{
|
||||
fs: fs,
|
||||
cpu: nodeCPUSecondsDesc,
|
||||
cpuInfo: prometheus.NewDesc(
|
||||
|
@ -62,6 +70,16 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
|
|||
"CPU information from /proc/cpuinfo.",
|
||||
[]string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil,
|
||||
),
|
||||
cpuFlagsInfo: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "flag_info"),
|
||||
"The `flags` field of CPU information from /proc/cpuinfo.",
|
||||
[]string{"flag"}, nil,
|
||||
),
|
||||
cpuBugsInfo: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "bug_info"),
|
||||
"The `bugs` field of CPU information from /proc/cpuinfo.",
|
||||
[]string{"bug"}, nil,
|
||||
),
|
||||
cpuGuest: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
|
||||
"Seconds the cpus spent in guests (VMs) for each mode.",
|
||||
|
@ -78,7 +96,34 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
|
|||
[]string{"package"}, nil,
|
||||
),
|
||||
logger: logger,
|
||||
}, nil
|
||||
}
|
||||
err = c.compileIncludeFlags(flagsInclude, bugsInclude)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error {
|
||||
if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo {
|
||||
*enableCPUInfo = true
|
||||
level.Info(c.logger).Log("msg", "--collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include")
|
||||
}
|
||||
|
||||
var err error
|
||||
if *flagsIncludeFlag != "" {
|
||||
c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if *bugsIncludeFlag != "" {
|
||||
c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
|
||||
|
@ -117,6 +162,31 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
|
|||
cpu.Microcode,
|
||||
cpu.Stepping,
|
||||
cpu.CacheSize)
|
||||
|
||||
if err := updateFieldInfo(cpu.Flags, c.cpuFlagsIncludeRegexp, c.cpuFlagsInfo, ch); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := updateFieldInfo(cpu.Bugs, c.cpuBugsIncludeRegexp, c.cpuBugsInfo, ch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateFieldInfo(valueList []string, filter *regexp.Regexp, desc *prometheus.Desc, ch chan<- prometheus.Metric) error {
|
||||
if filter == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, val := range valueList {
|
||||
if !filter.MatchString(val) {
|
||||
continue
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(desc,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
val,
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -184,12 +184,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
|
|||
# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
|
||||
# TYPE node_cooling_device_max_state gauge
|
||||
node_cooling_device_max_state{name="0",type="Processor"} 3
|
||||
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
|
||||
# TYPE node_cpu_bug_info gauge
|
||||
node_cpu_bug_info{bug="cpu_meltdown"} 1
|
||||
node_cpu_bug_info{bug="mds"} 1
|
||||
node_cpu_bug_info{bug="spectre_v1"} 1
|
||||
node_cpu_bug_info{bug="spectre_v2"} 1
|
||||
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
|
||||
# TYPE node_cpu_core_throttles_total counter
|
||||
node_cpu_core_throttles_total{core="0",package="0"} 5
|
||||
node_cpu_core_throttles_total{core="0",package="1"} 0
|
||||
node_cpu_core_throttles_total{core="1",package="0"} 0
|
||||
node_cpu_core_throttles_total{core="1",package="1"} 9
|
||||
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
|
||||
# TYPE node_cpu_flag_info gauge
|
||||
node_cpu_flag_info{flag="aes"} 1
|
||||
node_cpu_flag_info{flag="avx"} 1
|
||||
node_cpu_flag_info{flag="avx2"} 1
|
||||
node_cpu_flag_info{flag="constant_tsc"} 1
|
||||
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
|
||||
# TYPE node_cpu_guest_seconds_total counter
|
||||
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01
|
||||
|
|
|
@ -232,12 +232,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
|
|||
# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
|
||||
# TYPE node_cooling_device_max_state gauge
|
||||
node_cooling_device_max_state{name="0",type="Processor"} 3
|
||||
# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
|
||||
# TYPE node_cpu_bug_info gauge
|
||||
node_cpu_bug_info{bug="cpu_meltdown"} 1
|
||||
node_cpu_bug_info{bug="mds"} 1
|
||||
node_cpu_bug_info{bug="spectre_v1"} 1
|
||||
node_cpu_bug_info{bug="spectre_v2"} 1
|
||||
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
|
||||
# TYPE node_cpu_core_throttles_total counter
|
||||
node_cpu_core_throttles_total{core="0",package="0"} 5
|
||||
node_cpu_core_throttles_total{core="0",package="1"} 0
|
||||
node_cpu_core_throttles_total{core="1",package="0"} 0
|
||||
node_cpu_core_throttles_total{core="1",package="1"} 9
|
||||
# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
|
||||
# TYPE node_cpu_flag_info gauge
|
||||
node_cpu_flag_info{flag="aes"} 1
|
||||
node_cpu_flag_info{flag="avx"} 1
|
||||
node_cpu_flag_info{flag="avx2"} 1
|
||||
node_cpu_flag_info{flag="constant_tsc"} 1
|
||||
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
|
||||
# TYPE node_cpu_guest_seconds_total counter
|
||||
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01
|
||||
|
|
|
@ -107,6 +107,8 @@ fi
|
|||
--collector.qdisc.fixtures="collector/fixtures/qdisc/" \
|
||||
--collector.netclass.ignored-devices="(bond0|dmz|int)" \
|
||||
--collector.cpu.info \
|
||||
--collector.cpu.info.flags-include="^(aes|avx.?|constant_tsc)$" \
|
||||
--collector.cpu.info.bugs-include="^(cpu_meltdown|spectre_.*|mds)$" \
|
||||
--web.listen-address "127.0.0.1:${port}" \
|
||||
--log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 &
|
||||
|
||||
|
|
Loading…
Reference in a new issue