Unify CPU collector conventions (#806)

* Unify CPU collector conventions

Add a common CPU metric description.
* All collectors use the same `nodeCpuSecondsDesc`.
* All collectors drop the `cpu` prefix for `cpu` label values.

* Fix subsystem string in cpu_freebsd.

* Fix Linux CPU freq label names.
This commit is contained in:
Ben Kochie 2018-02-01 18:42:20 +01:00 committed by GitHub
parent e3c76b1f0c
commit 14d60958d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 129 additions and 117 deletions

32
collector/cpu_common.go Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !nocpu
package collector
import (
"github.com/prometheus/client_golang/prometheus"
)
const (
cpuCollectorSubsystem = "cpu"
)
var (
nodeCpuSecondsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
)
)

View file

@ -58,11 +58,7 @@ func init() {
// NewCPUCollector returns a new Collector exposing CPU stats.
func NewCPUCollector() (Collector, error) {
return &statCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cpu"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
cpu: nodeCpuSecondsDesc,
}, nil
}
@ -111,7 +107,7 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) error {
"nice": C.CPU_STATE_NICE,
"idle": C.CPU_STATE_IDLE,
} {
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, float64(cpuTicks[v])/ClocksPerSec, "cpu"+strconv.Itoa(i), k)
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, float64(cpuTicks[v])/ClocksPerSec, strconv.Itoa(i), k)
}
}
return nil

View file

@ -94,11 +94,7 @@ func init() {
// NewStatCollector returns a new Collector exposing CPU stats.
func NewStatCollector() (Collector, error) {
return &statCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cpu"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
cpu: nodeCpuSecondsDesc,
}, nil
}
@ -143,7 +139,7 @@ func (c *statCollector) Update(ch chan<- prometheus.Metric) error {
// Export order: user nice sys intr idle
cpuFields := []string{"user", "nice", "sys", "interrupt", "idle"}
for i, value := range cpuTimes {
cpux := fmt.Sprintf("cpu%d", i/fieldsCount)
cpux := fmt.Sprintf("%d", i/fieldsCount)
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, value, cpux, cpuFields[i%fieldsCount])
}

View file

@ -92,13 +92,9 @@ func init() {
// NewStatCollector returns a new Collector exposing CPU stats.
func NewStatCollector() (Collector, error) {
return &statCollector{
cpu: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cpu", "seconds_total"),
"Seconds the CPU spent in each mode.",
[]string{"cpu", "mode"}, nil,
), prometheus.CounterValue},
cpu: typedDesc{nodeCpuSecondsDesc, prometheus.CounterValue},
temp: typedDesc{prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cpu", "temperature_celsius"),
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "temperature_celsius"),
"CPU temperature",
[]string{"cpu"}, nil,
), prometheus.GaugeValue},

View file

@ -28,10 +28,6 @@ import (
"github.com/prometheus/procfs"
)
const (
cpuCollectorSubsystem = "cpu"
)
var (
digitRegexp = regexp.MustCompile("[0-9]+")
)
@ -53,11 +49,7 @@ func init() {
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
func NewCPUCollector() (Collector, error) {
return &cpuCollector{
cpu: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"),
"Seconds the cpus spent in each mode.",
[]string{"cpu", "mode"}, nil,
),
cpu: nodeCpuSecondsDesc,
cpuGuest: prometheus.NewDesc(
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
"Seconds the cpus spent in guests (VMs) for each mode.",
@ -114,7 +106,8 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
// cpu loop
for _, cpu := range cpus {
_, cpuname := filepath.Split(cpu)
_, cpuName := filepath.Split(cpu)
cpuNum := strings.TrimPrefix(cpuName, "cpu")
if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
log.Debugf("CPU %v is missing cpufreq", cpu)
@ -124,17 +117,17 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_cur_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value)*1000.0, cpuNum)
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_min_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value)*1000.0, cpuNum)
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_max_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value)*1000.0, cpuNum)
}
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
@ -144,7 +137,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuNum)
}
nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
@ -201,16 +194,15 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
}
for cpuID, cpuStat := range stats.CPU {
cpuName := fmt.Sprintf("cpu%d", cpuID)
cpuNum := fmt.Sprintf("%d", cpuID)
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuName, "user")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuName, "nice")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuName, "system")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuName, "idle")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuName, "iowait")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuName, "irq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuName, "softirq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuName, "steal")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq")
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal")
// Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics.
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user")

View file

@ -201,24 +201,24 @@ node_buddyinfo_count{node="0",size="9",zone="Normal"} 0
node_context_switches_total 3.8014093e+07
# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
# TYPE node_cpu_core_throttles_total counter
node_cpu_core_throttles_total{cpu="cpu0"} 5
node_cpu_core_throttles_total{cpu="cpu1"} 0
node_cpu_core_throttles_total{cpu="cpu2"} 40
node_cpu_core_throttles_total{cpu="0"} 5
node_cpu_core_throttles_total{cpu="1"} 0
node_cpu_core_throttles_total{cpu="2"} 40
# HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz.
# TYPE node_cpu_frequency_hertz gauge
node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+09
node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+09
node_cpu_frequency_hertz{cpu="cpu3"} 8e+06
node_cpu_frequency_hertz{cpu="0"} 1.699981e+09
node_cpu_frequency_hertz{cpu="1"} 1.699981e+09
node_cpu_frequency_hertz{cpu="3"} 8e+06
# HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_max_hertz gauge
node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+09
node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+09
node_cpu_frequency_max_hertz{cpu="cpu3"} 4.2e+09
node_cpu_frequency_max_hertz{cpu="0"} 3.7e+09
node_cpu_frequency_max_hertz{cpu="1"} 3.7e+09
node_cpu_frequency_max_hertz{cpu="3"} 4.2e+09
# HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_min_hertz gauge
node_cpu_frequency_min_hertz{cpu="cpu0"} 8e+08
node_cpu_frequency_min_hertz{cpu="cpu1"} 8e+08
node_cpu_frequency_min_hertz{cpu="cpu3"} 1e+06
node_cpu_frequency_min_hertz{cpu="0"} 8e+08
node_cpu_frequency_min_hertz{cpu="1"} 8e+08
node_cpu_frequency_min_hertz{cpu="3"} 1e+06
# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
# TYPE node_cpu_guest_seconds_total counter
node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01
@ -242,70 +242,70 @@ node_cpu_guest_seconds_total{cpu="7",mode="user"} 0.09
node_cpu_package_throttles_total{node="0"} 30
# HELP node_cpu_seconds_total Seconds the cpus spent in each mode.
# TYPE node_cpu_seconds_total counter
node_cpu_seconds_total{cpu="cpu0",mode="idle"} 10870.69
node_cpu_seconds_total{cpu="cpu0",mode="iowait"} 2.2
node_cpu_seconds_total{cpu="cpu0",mode="irq"} 0.01
node_cpu_seconds_total{cpu="cpu0",mode="nice"} 0.19
node_cpu_seconds_total{cpu="cpu0",mode="softirq"} 34.1
node_cpu_seconds_total{cpu="cpu0",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu0",mode="system"} 210.45
node_cpu_seconds_total{cpu="cpu0",mode="user"} 444.9
node_cpu_seconds_total{cpu="cpu1",mode="idle"} 11107.87
node_cpu_seconds_total{cpu="cpu1",mode="iowait"} 5.91
node_cpu_seconds_total{cpu="cpu1",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu1",mode="nice"} 0.23
node_cpu_seconds_total{cpu="cpu1",mode="softirq"} 0.46
node_cpu_seconds_total{cpu="cpu1",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu1",mode="system"} 164.74
node_cpu_seconds_total{cpu="cpu1",mode="user"} 478.69
node_cpu_seconds_total{cpu="cpu2",mode="idle"} 11123.21
node_cpu_seconds_total{cpu="cpu2",mode="iowait"} 4.41
node_cpu_seconds_total{cpu="cpu2",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu2",mode="nice"} 0.36
node_cpu_seconds_total{cpu="cpu2",mode="softirq"} 3.26
node_cpu_seconds_total{cpu="cpu2",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu2",mode="system"} 159.16
node_cpu_seconds_total{cpu="cpu2",mode="user"} 465.04
node_cpu_seconds_total{cpu="cpu3",mode="idle"} 11132.3
node_cpu_seconds_total{cpu="cpu3",mode="iowait"} 5.33
node_cpu_seconds_total{cpu="cpu3",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu3",mode="nice"} 1.02
node_cpu_seconds_total{cpu="cpu3",mode="softirq"} 0.6
node_cpu_seconds_total{cpu="cpu3",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu3",mode="system"} 156.83
node_cpu_seconds_total{cpu="cpu3",mode="user"} 470.54
node_cpu_seconds_total{cpu="cpu4",mode="idle"} 11403.21
node_cpu_seconds_total{cpu="cpu4",mode="iowait"} 2.17
node_cpu_seconds_total{cpu="cpu4",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu4",mode="nice"} 0.25
node_cpu_seconds_total{cpu="cpu4",mode="softirq"} 0.08
node_cpu_seconds_total{cpu="cpu4",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu4",mode="system"} 107.76
node_cpu_seconds_total{cpu="cpu4",mode="user"} 284.13
node_cpu_seconds_total{cpu="cpu5",mode="idle"} 11362.7
node_cpu_seconds_total{cpu="cpu5",mode="iowait"} 6.72
node_cpu_seconds_total{cpu="cpu5",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu5",mode="nice"} 1.01
node_cpu_seconds_total{cpu="cpu5",mode="softirq"} 0.3
node_cpu_seconds_total{cpu="cpu5",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu5",mode="system"} 115.86
node_cpu_seconds_total{cpu="cpu5",mode="user"} 292.71
node_cpu_seconds_total{cpu="cpu6",mode="idle"} 11397.21
node_cpu_seconds_total{cpu="cpu6",mode="iowait"} 3.19
node_cpu_seconds_total{cpu="cpu6",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu6",mode="nice"} 0.36
node_cpu_seconds_total{cpu="cpu6",mode="softirq"} 0.29
node_cpu_seconds_total{cpu="cpu6",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu6",mode="system"} 102.76
node_cpu_seconds_total{cpu="cpu6",mode="user"} 291.52
node_cpu_seconds_total{cpu="cpu7",mode="idle"} 11392.82
node_cpu_seconds_total{cpu="cpu7",mode="iowait"} 5.55
node_cpu_seconds_total{cpu="cpu7",mode="irq"} 0
node_cpu_seconds_total{cpu="cpu7",mode="nice"} 2.68
node_cpu_seconds_total{cpu="cpu7",mode="softirq"} 0.31
node_cpu_seconds_total{cpu="cpu7",mode="steal"} 0
node_cpu_seconds_total{cpu="cpu7",mode="system"} 101.64
node_cpu_seconds_total{cpu="cpu7",mode="user"} 290.98
node_cpu_seconds_total{cpu="0",mode="idle"} 10870.69
node_cpu_seconds_total{cpu="0",mode="iowait"} 2.2
node_cpu_seconds_total{cpu="0",mode="irq"} 0.01
node_cpu_seconds_total{cpu="0",mode="nice"} 0.19
node_cpu_seconds_total{cpu="0",mode="softirq"} 34.1
node_cpu_seconds_total{cpu="0",mode="steal"} 0
node_cpu_seconds_total{cpu="0",mode="system"} 210.45
node_cpu_seconds_total{cpu="0",mode="user"} 444.9
node_cpu_seconds_total{cpu="1",mode="idle"} 11107.87
node_cpu_seconds_total{cpu="1",mode="iowait"} 5.91
node_cpu_seconds_total{cpu="1",mode="irq"} 0
node_cpu_seconds_total{cpu="1",mode="nice"} 0.23
node_cpu_seconds_total{cpu="1",mode="softirq"} 0.46
node_cpu_seconds_total{cpu="1",mode="steal"} 0
node_cpu_seconds_total{cpu="1",mode="system"} 164.74
node_cpu_seconds_total{cpu="1",mode="user"} 478.69
node_cpu_seconds_total{cpu="2",mode="idle"} 11123.21
node_cpu_seconds_total{cpu="2",mode="iowait"} 4.41
node_cpu_seconds_total{cpu="2",mode="irq"} 0
node_cpu_seconds_total{cpu="2",mode="nice"} 0.36
node_cpu_seconds_total{cpu="2",mode="softirq"} 3.26
node_cpu_seconds_total{cpu="2",mode="steal"} 0
node_cpu_seconds_total{cpu="2",mode="system"} 159.16
node_cpu_seconds_total{cpu="2",mode="user"} 465.04
node_cpu_seconds_total{cpu="3",mode="idle"} 11132.3
node_cpu_seconds_total{cpu="3",mode="iowait"} 5.33
node_cpu_seconds_total{cpu="3",mode="irq"} 0
node_cpu_seconds_total{cpu="3",mode="nice"} 1.02
node_cpu_seconds_total{cpu="3",mode="softirq"} 0.6
node_cpu_seconds_total{cpu="3",mode="steal"} 0
node_cpu_seconds_total{cpu="3",mode="system"} 156.83
node_cpu_seconds_total{cpu="3",mode="user"} 470.54
node_cpu_seconds_total{cpu="4",mode="idle"} 11403.21
node_cpu_seconds_total{cpu="4",mode="iowait"} 2.17
node_cpu_seconds_total{cpu="4",mode="irq"} 0
node_cpu_seconds_total{cpu="4",mode="nice"} 0.25
node_cpu_seconds_total{cpu="4",mode="softirq"} 0.08
node_cpu_seconds_total{cpu="4",mode="steal"} 0
node_cpu_seconds_total{cpu="4",mode="system"} 107.76
node_cpu_seconds_total{cpu="4",mode="user"} 284.13
node_cpu_seconds_total{cpu="5",mode="idle"} 11362.7
node_cpu_seconds_total{cpu="5",mode="iowait"} 6.72
node_cpu_seconds_total{cpu="5",mode="irq"} 0
node_cpu_seconds_total{cpu="5",mode="nice"} 1.01
node_cpu_seconds_total{cpu="5",mode="softirq"} 0.3
node_cpu_seconds_total{cpu="5",mode="steal"} 0
node_cpu_seconds_total{cpu="5",mode="system"} 115.86
node_cpu_seconds_total{cpu="5",mode="user"} 292.71
node_cpu_seconds_total{cpu="6",mode="idle"} 11397.21
node_cpu_seconds_total{cpu="6",mode="iowait"} 3.19
node_cpu_seconds_total{cpu="6",mode="irq"} 0
node_cpu_seconds_total{cpu="6",mode="nice"} 0.36
node_cpu_seconds_total{cpu="6",mode="softirq"} 0.29
node_cpu_seconds_total{cpu="6",mode="steal"} 0
node_cpu_seconds_total{cpu="6",mode="system"} 102.76
node_cpu_seconds_total{cpu="6",mode="user"} 291.52
node_cpu_seconds_total{cpu="7",mode="idle"} 11392.82
node_cpu_seconds_total{cpu="7",mode="iowait"} 5.55
node_cpu_seconds_total{cpu="7",mode="irq"} 0
node_cpu_seconds_total{cpu="7",mode="nice"} 2.68
node_cpu_seconds_total{cpu="7",mode="softirq"} 0.31
node_cpu_seconds_total{cpu="7",mode="steal"} 0
node_cpu_seconds_total{cpu="7",mode="system"} 101.64
node_cpu_seconds_total{cpu="7",mode="user"} 290.98
# HELP node_disk_io_now The number of I/Os currently in progress.
# TYPE node_disk_io_now gauge
node_disk_io_now{device="dm-0"} 0