From cc847f2f44172881d79f40a99f1ad6d90f3e528e Mon Sep 17 00:00:00 2001 From: Paul Gier Date: Tue, 19 Feb 2019 10:22:54 -0600 Subject: [PATCH] collector/cpu: split cpu freq metrics into separate collector (#1253) The cpu frequency information is not always needed and/or available. This change allows the cpu frequency metrics to be enabled/disabled separately from the other cpu metrics, and also prevents a frequency metric failure (such as a parse error) from failing the main cpu collector. Fixes #1241 Signed-off-by: Paul Gier --- CHANGELOG.md | 1 + README.md | 1 + collector/cpu_linux.go | 107 ---------------- collector/cpu_solaris.go | 70 +---------- collector/cpufreq_linux.go | 139 +++++++++++++++++++++ collector/cpufreq_solaris.go | 95 ++++++++++++++ collector/fixtures/e2e-64k-page-output.txt | 1 + collector/fixtures/e2e-output.txt | 1 + end-to-end-test.sh | 1 + 9 files changed, 241 insertions(+), 175 deletions(-) create mode 100644 collector/cpufreq_linux.go create mode 100644 collector/cpufreq_solaris.go diff --git a/CHANGELOG.md b/CHANGELOG.md index cf17e7df..154b4b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ * [CHANGE] Add separate cpufreq and scaling metrics #1248 * [CHANGE] Several systemd metrics have been turned off by default to improve performance #1254 * [CHANGE] Expand systemd collector blacklist #1255 +* [CHANGE] Split cpufreq metrics into a separate collector #1253 * [ENHANCEMENT] Add Infiniband counters #1120 * [ENHANCEMENT] Move network_up labels into new metric network_info #1236 * [FEATURE] Add a flag to disable exporter metrics #1148 diff --git a/README.md b/README.md index 3196ab3b..8fbb8f1b 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ bonding | Exposes the number of configured and active slaves of Linux bonding in boottime | Exposes system boot time derived from the `kern.boottime` sysctl. | Darwin, Dragonfly, FreeBSD, NetBSD, OpenBSD, Solaris conntrack | Shows conntrack statistics (does nothing if no `/proc/sys/net/netfilter/` present). | Linux cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD, Linux, Solaris +cpufreq | Exposes CPU frequency statistics | Linux, Solaris diskstats | Exposes disk I/O statistics. | Darwin, Linux, OpenBSD edac | Exposes error detection and correction statistics. | Linux entropy | Exposes available entropy. | Linux diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 50d8a3b1..be63590d 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -23,18 +23,11 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/log" "github.com/prometheus/procfs" - "github.com/prometheus/procfs/sysfs" ) type cpuCollector struct { cpu *prometheus.Desc cpuGuest *prometheus.Desc - cpuFreq *prometheus.Desc - cpuFreqMin *prometheus.Desc - cpuFreqMax *prometheus.Desc - scalingFreq *prometheus.Desc - scalingFreqMin *prometheus.Desc - scalingFreqMax *prometheus.Desc cpuCoreThrottle *prometheus.Desc cpuPackageThrottle *prometheus.Desc } @@ -52,36 +45,6 @@ func NewCPUCollector() (Collector, error) { "Seconds the cpus spent in guests (VMs) for each mode.", []string{"cpu", "mode"}, nil, ), - cpuFreq: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"), - "Current cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - cpuFreqMin: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_min_hertz"), - "Minimum cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - cpuFreqMax: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"), - "Maximum cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - scalingFreq: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_hertz"), - "Current scaled cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - scalingFreqMin: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_min_hrts"), - "Minimum scaled cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - scalingFreqMax: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_max_hrts"), - "Maximum scaled cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), "Number of times this cpu core has been throttled.", @@ -100,82 +63,12 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateStat(ch); err != nil { return err } - if err := c.updateCPUfreq(ch); err != nil { - return err - } if err := c.updateThermalThrottle(ch); err != nil { return err } return nil } -// updateCPUfreq reads /sys/devices/system/cpu/cpu* and expose cpu frequency statistics. -func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { - fs, err := sysfs.NewFS(*sysPath) - if err != nil { - return fmt.Errorf("failed to open sysfs: %v", err) - } - - cpuFreqs, err := fs.NewSystemCpufreq() - if err != nil { - return err - } - - // sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz). - // See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt - for _, stats := range cpuFreqs { - if stats.CpuinfoCurrentFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.cpuFreq, - prometheus.GaugeValue, - float64(*stats.CpuinfoCurrentFrequency)*1000.0, - stats.Name, - ) - } - if stats.CpuinfoMinimumFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.cpuFreqMin, - prometheus.GaugeValue, - float64(*stats.CpuinfoMinimumFrequency)*1000.0, - stats.Name, - ) - } - if stats.CpuinfoMaximumFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.cpuFreqMax, - prometheus.GaugeValue, - float64(*stats.CpuinfoMaximumFrequency)*1000.0, - stats.Name, - ) - } - if stats.ScalingCurrentFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.scalingFreq, - prometheus.GaugeValue, - float64(*stats.ScalingCurrentFrequency)*1000.0, - stats.Name, - ) - } - if stats.ScalingMinimumFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.scalingFreqMin, - prometheus.GaugeValue, - float64(*stats.ScalingMinimumFrequency)*1000.0, - stats.Name, - ) - } - if stats.ScalingMaximumFrequency != nil { - ch <- prometheus.MustNewConstMetric( - c.scalingFreqMax, - prometheus.GaugeValue, - float64(*stats.ScalingMaximumFrequency)*1000.0, - stats.Name, - ) - } - } - return nil -} - // updateThermalThrottle reads /sys/devices/system/cpu/cpu* and expose thermal throttle statistics. func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error { cpus, err := filepath.Glob(sysFilePath("devices/system/cpu/cpu[0-9]*")) diff --git a/collector/cpu_solaris.go b/collector/cpu_solaris.go index f60aeb3f..f772765e 100644 --- a/collector/cpu_solaris.go +++ b/collector/cpu_solaris.go @@ -17,20 +17,17 @@ package collector import ( - "fmt" "strconv" "github.com/prometheus/client_golang/prometheus" - "github.com/siebenmann/go-kstat" + kstat "github.com/siebenmann/go-kstat" ) // #include import "C" type cpuCollector struct { - cpu typedDesc - cpuFreq *prometheus.Desc - cpuFreqMax *prometheus.Desc + cpu typedDesc } func init() { @@ -40,30 +37,10 @@ func init() { func NewCpuCollector() (Collector, error) { return &cpuCollector{ cpu: typedDesc{nodeCPUSecondsDesc, prometheus.CounterValue}, - cpuFreq: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"), - "Current cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), - cpuFreqMax: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"), - "Maximum cpu thread frequency in hertz.", - []string{"cpu"}, nil, - ), }, nil } func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { - if err := c.updateCPUstats(ch); err != nil { - return err - } - if err := c.updateCPUfreq(ch); err != nil { - return err - } - return nil -} - -func (c *cpuCollector) updateCPUstats(ch chan<- prometheus.Metric) error { ncpus := C.sysconf(C._SC_NPROCESSORS_ONLN) tok, err := kstat.Open() @@ -95,46 +72,3 @@ func (c *cpuCollector) updateCPUstats(ch chan<- prometheus.Metric) error { } return nil } - -func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { - ncpus := C.sysconf(C._SC_NPROCESSORS_ONLN) - - tok, err := kstat.Open() - if err != nil { - return err - } - - defer tok.Close() - - for cpu := 0; cpu < int(ncpus); cpu++ { - ksCPUInfo, err := tok.Lookup("cpu_info", cpu, fmt.Sprintf("cpu_info%d", cpu)) - if err != nil { - return err - } - cpuFreqV, err := ksCPUInfo.GetNamed("current_clock_Hz") - if err != nil { - return err - } - - cpuFreqMaxV, err := ksCPUInfo.GetNamed("clock_MHz") - if err != nil { - return err - } - - lcpu := strconv.Itoa(cpu) - ch <- prometheus.MustNewConstMetric( - c.cpuFreq, - prometheus.GaugeValue, - float64(cpuFreqV.UintVal), - lcpu, - ) - // Multiply by 1e+6 to convert MHz to Hz. - ch <- prometheus.MustNewConstMetric( - c.cpuFreqMax, - prometheus.GaugeValue, - float64(cpuFreqMaxV.IntVal)*1e+6, - lcpu, - ) - } - return nil -} diff --git a/collector/cpufreq_linux.go b/collector/cpufreq_linux.go new file mode 100644 index 00000000..11f6f38e --- /dev/null +++ b/collector/cpufreq_linux.go @@ -0,0 +1,139 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !nocpu + +package collector + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" +) + +type cpuFreqCollector struct { + cpuFreq *prometheus.Desc + cpuFreqMin *prometheus.Desc + cpuFreqMax *prometheus.Desc + scalingFreq *prometheus.Desc + scalingFreqMin *prometheus.Desc + scalingFreqMax *prometheus.Desc +} + +func init() { + registerCollector("cpufreq", defaultEnabled, NewCPUFreqCollector) +} + +// NewCPUFreqCollector returns a new Collector exposing kernel/system statistics. +func NewCPUFreqCollector() (Collector, error) { + return &cpuFreqCollector{ + cpuFreq: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"), + "Current cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuFreqMin: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_min_hertz"), + "Minimum cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuFreqMax: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"), + "Maximum cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + scalingFreq: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_hertz"), + "Current scaled cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + scalingFreqMin: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_min_hrts"), + "Minimum scaled cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + scalingFreqMax: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "scaling_frequency_max_hrts"), + "Maximum scaled cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + }, nil +} + +// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. +func (c *cpuFreqCollector) Update(ch chan<- prometheus.Metric) error { + fs, err := sysfs.NewFS(*sysPath) + if err != nil { + return fmt.Errorf("failed to open sysfs: %v", err) + } + + cpuFreqs, err := fs.NewSystemCpufreq() + if err != nil { + return err + } + + // sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz). + // See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt + for _, stats := range cpuFreqs { + if stats.CpuinfoCurrentFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.cpuFreq, + prometheus.GaugeValue, + float64(*stats.CpuinfoCurrentFrequency)*1000.0, + stats.Name, + ) + } + if stats.CpuinfoMinimumFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.cpuFreqMin, + prometheus.GaugeValue, + float64(*stats.CpuinfoMinimumFrequency)*1000.0, + stats.Name, + ) + } + if stats.CpuinfoMaximumFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.cpuFreqMax, + prometheus.GaugeValue, + float64(*stats.CpuinfoMaximumFrequency)*1000.0, + stats.Name, + ) + } + if stats.ScalingCurrentFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.scalingFreq, + prometheus.GaugeValue, + float64(*stats.ScalingCurrentFrequency)*1000.0, + stats.Name, + ) + } + if stats.ScalingMinimumFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.scalingFreqMin, + prometheus.GaugeValue, + float64(*stats.ScalingMinimumFrequency)*1000.0, + stats.Name, + ) + } + if stats.ScalingMaximumFrequency != nil { + ch <- prometheus.MustNewConstMetric( + c.scalingFreqMax, + prometheus.GaugeValue, + float64(*stats.ScalingMaximumFrequency)*1000.0, + stats.Name, + ) + } + } + return nil +} diff --git a/collector/cpufreq_solaris.go b/collector/cpufreq_solaris.go new file mode 100644 index 00000000..77ac3ab8 --- /dev/null +++ b/collector/cpufreq_solaris.go @@ -0,0 +1,95 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build solaris +// +build !nocpu + +package collector + +import ( + "fmt" + "strconv" + + "github.com/prometheus/client_golang/prometheus" + kstat "github.com/siebenmann/go-kstat" +) + +// #include +import "C" + +type cpuFreqCollector struct { + cpuFreq *prometheus.Desc + cpuFreqMax *prometheus.Desc +} + +func init() { + registerCollector("cpufreq", defaultEnabled, NewCpuFreqCollector) +} + +func NewFreqCpuCollector() (Collector, error) { + return &cpuFreqCollector{ + cpuFreq: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_hertz"), + "Current cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuFreqMax: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "frequency_max_hertz"), + "Maximum cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + }, nil +} + +func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { + ncpus := C.sysconf(C._SC_NPROCESSORS_ONLN) + + tok, err := kstat.Open() + if err != nil { + return err + } + + defer tok.Close() + + for cpu := 0; cpu < int(ncpus); cpu++ { + ksCPUInfo, err := tok.Lookup("cpu_info", cpu, fmt.Sprintf("cpu_info%d", cpu)) + if err != nil { + return err + } + cpuFreqV, err := ksCPUInfo.GetNamed("current_clock_Hz") + if err != nil { + return err + } + + cpuFreqMaxV, err := ksCPUInfo.GetNamed("clock_MHz") + if err != nil { + return err + } + + lcpu := strconv.Itoa(cpu) + ch <- prometheus.MustNewConstMetric( + c.cpuFreq, + prometheus.GaugeValue, + float64(cpuFreqV.UintVal), + lcpu, + ) + // Multiply by 1e+6 to convert MHz to Hz. + ch <- prometheus.MustNewConstMetric( + c.cpuFreqMax, + prometheus.GaugeValue, + float64(cpuFreqMaxV.IntVal)*1e+6, + lcpu, + ) + } + return nil +} diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index aa96e911..ea597310 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2340,6 +2340,7 @@ node_scrape_collector_success{collector="bonding"} 1 node_scrape_collector_success{collector="buddyinfo"} 1 node_scrape_collector_success{collector="conntrack"} 1 node_scrape_collector_success{collector="cpu"} 1 +node_scrape_collector_success{collector="cpufreq"} 1 node_scrape_collector_success{collector="diskstats"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index d278833c..f4c6d56e 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2340,6 +2340,7 @@ node_scrape_collector_success{collector="bonding"} 1 node_scrape_collector_success{collector="buddyinfo"} 1 node_scrape_collector_success{collector="conntrack"} 1 node_scrape_collector_success{collector="cpu"} 1 +node_scrape_collector_success{collector="cpufreq"} 1 node_scrape_collector_success{collector="diskstats"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 43f8fbf1..ea24cf51 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -8,6 +8,7 @@ enabled_collectors=$(cat << COLLECTORS buddyinfo conntrack cpu + cpufreq diskstats drbd edac