Fix Linux cpu errors (#606)

Make the Linux cpu collector soft-error on missing `cpufreq` and
`thermal_throttle` features.
This commit is contained in:
Ben Kochie 2017-06-20 07:51:26 +02:00 committed by GitHub
parent be6291adde
commit 182810056f
7 changed files with 38 additions and 18 deletions

View file

@ -17,9 +17,11 @@ package collector
import ( import (
"fmt" "fmt"
"os"
"path/filepath" "path/filepath"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/procfs" "github.com/prometheus/procfs"
) )
@ -99,6 +101,9 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
for _, cpu := range cpus { for _, cpu := range cpus {
_, cpuname := filepath.Split(cpu) _, cpuname := filepath.Split(cpu)
if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing cpufreq", cpu)
} else {
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil { if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil {
return err return err
} }
@ -113,7 +118,11 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
return err return err
} }
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname) ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname)
}
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing thermal_throttle", cpu)
} else {
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil { if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil {
return err return err
} }
@ -124,6 +133,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
} }
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname) ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname)
} }
}
return nil return nil
} }

View file

@ -212,22 +212,27 @@ node_cpu{cpu="cpu7",mode="user"} 290.98
# TYPE node_cpu_core_throttles_total counter # TYPE node_cpu_core_throttles_total counter
node_cpu_core_throttles_total{cpu="cpu0"} 5 node_cpu_core_throttles_total{cpu="cpu0"} 5
node_cpu_core_throttles_total{cpu="cpu1"} 0 node_cpu_core_throttles_total{cpu="cpu1"} 0
node_cpu_core_throttles_total{cpu="cpu2"} 40
# HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz. # HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz.
# TYPE node_cpu_frequency_hertz gauge # TYPE node_cpu_frequency_hertz gauge
node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+06 node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+06
node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+06 node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+06
node_cpu_frequency_hertz{cpu="cpu3"} 8000
# HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz. # HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_max_hertz gauge # TYPE node_cpu_frequency_max_hertz gauge
node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+06 node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+06
node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+06 node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+06
node_cpu_frequency_max_hertz{cpu="cpu3"} 4.2e+06
# HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz. # HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz.
# TYPE node_cpu_frequency_min_hertz gauge # TYPE node_cpu_frequency_min_hertz gauge
node_cpu_frequency_min_hertz{cpu="cpu0"} 800000 node_cpu_frequency_min_hertz{cpu="cpu0"} 800000
node_cpu_frequency_min_hertz{cpu="cpu1"} 800000 node_cpu_frequency_min_hertz{cpu="cpu1"} 800000
node_cpu_frequency_min_hertz{cpu="cpu3"} 1000
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled. # HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
# TYPE node_cpu_package_throttles_total counter # TYPE node_cpu_package_throttles_total counter
node_cpu_package_throttles_total{cpu="cpu0"} 30 node_cpu_package_throttles_total{cpu="cpu0"} 30
node_cpu_package_throttles_total{cpu="cpu1"} 30 node_cpu_package_throttles_total{cpu="cpu1"} 30
node_cpu_package_throttles_total{cpu="cpu2"} 6
# HELP node_disk_bytes_read The total number of bytes read successfully. # HELP node_disk_bytes_read The total number of bytes read successfully.
# TYPE node_disk_bytes_read counter # TYPE node_disk_bytes_read counter
node_disk_bytes_read{device="dm-0"} 5.13708655616e+11 node_disk_bytes_read{device="dm-0"} 5.13708655616e+11

View file

@ -0,0 +1 @@
4200000