diff --git a/README.md b/README.md index 45c38374..3ec32f00 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Name | Description | OS ---------|-------------|---- arp | Exposes ARP statistics from `/proc/net/arp`. | Linux conntrack | Shows conntrack statistics (does nothing if no `/proc/sys/net/netfilter/` present). | Linux -cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD +cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD, Linux diskstats | Exposes disk I/O statistics from `/proc/diskstats`. | Linux edac | Exposes error detection and correction statistics. | Linux entropy | Exposes available entropy. | Linux @@ -38,7 +38,7 @@ meminfo | Exposes memory statistics. | Darwin, Dragonfly, FreeBSD, Linux netdev | Exposes network interface statistics such as bytes transferred. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD netstat | Exposes network statistics from `/proc/net/netstat`. This is the same information as `netstat -s`. | Linux sockstat | Exposes various statistics from `/proc/net/sockstat`. | Linux -stat | Exposes various statistics from `/proc/stat`. This includes CPU usage, boot time, forks and interrupts. | Linux +stat | Exposes various statistics from `/proc/stat`. This includes boot time, forks and interrupts. | Linux textfile | Exposes statistics read from local disk. The `--collector.textfile.directory` flag must be set. | _any_ time | Exposes the current system time. | _any_ uname | Exposes system information as provided by the uname system call. | Linux diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go new file mode 100644 index 00000000..abe12e72 --- /dev/null +++ b/collector/cpu_linux.go @@ -0,0 +1,157 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !nocpu + +package collector + +import ( + "fmt" + "path/filepath" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs" +) + +const ( + cpuCollectorNamespace = "cpu" +) + +type cpuCollector struct { + cpu *prometheus.Desc + cpuFreq *prometheus.Desc + cpuFreqMin *prometheus.Desc + cpuFreqMax *prometheus.Desc + cpuCoreThrottle *prometheus.Desc + cpuPackageThrottle *prometheus.Desc +} + +func init() { + Factories["cpu"] = NewCPUCollector +} + +// NewCPUCollector returns a new Collector exposing kernel/system statistics. +func NewCPUCollector() (Collector, error) { + return &cpuCollector{ + cpu: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "", cpuCollectorNamespace), + "Seconds the cpus spent in each mode.", + []string{"cpu", "mode"}, nil, + ), + cpuFreq: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_hertz"), + "Current cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuFreqMin: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_min_hertz"), + "Minimum cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuFreqMax: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "frequency_max_hertz"), + "Maximum cpu thread frequency in hertz.", + []string{"cpu"}, nil, + ), + cpuCoreThrottle: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"), + "Number of times this cpu core has been throttled.", + []string{"cpu"}, nil, + ), + cpuPackageThrottle: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"), + "Number of times this cpu package has been throttled.", + []string{"cpu"}, nil, + ), + }, nil +} + +// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. +func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { + if err := c.updateStat(ch); err != nil { + return err + } + if err := c.updateCPUfreq(ch); err != nil { + return err + } + return nil +} + +// updateCPUfreq reads /sys/bus/cpu/devices/cpu* and expose cpu frequency statistics. +func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error { + cpus, err := filepath.Glob(sysFilePath("bus/cpu/devices/cpu[0-9]*")) + if err != nil { + return err + } + + var value uint64 + + for _, cpu := range cpus { + _, cpuname := filepath.Split(cpu) + + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value), cpuname) + + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value), cpuname) + + if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value), cpuname) + + if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname) + + if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil { + return err + } + ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname) + } + + return nil +} + +// updateStat reads /proc/stat through procfs and exports cpu related metrics. +func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { + fs, err := procfs.NewFS(*procPath) + if err != nil { + return fmt.Errorf("failed to open procfs: %v", err) + } + stats, err := fs.NewStat() + if err != nil { + return err + } + + for cpuID, cpuStat := range stats.CPU { + cpuName := fmt.Sprintf("cpu%d", cpuID) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuName, "user") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuName, "nice") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuName, "system") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuName, "idle") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuName, "iowait") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuName, "irq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuName, "softirq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuName, "steal") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Guest, cpuName, "guest") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.GuestNice, cpuName, "guest_nice") + } + + return nil +} diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 4475584f..7d81c14e 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -208,6 +208,26 @@ node_cpu{cpu="cpu7",mode="softirq"} 0.31 node_cpu{cpu="cpu7",mode="steal"} 0 node_cpu{cpu="cpu7",mode="system"} 101.64 node_cpu{cpu="cpu7",mode="user"} 290.98 +# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. +# TYPE node_cpu_core_throttles_total counter +node_cpu_core_throttles_total{cpu="cpu0"} 5 +node_cpu_core_throttles_total{cpu="cpu1"} 0 +# HELP node_cpu_frequency_hertz Current cpu thread frequency in hertz. +# TYPE node_cpu_frequency_hertz gauge +node_cpu_frequency_hertz{cpu="cpu0"} 1.699981e+06 +node_cpu_frequency_hertz{cpu="cpu1"} 1.699981e+06 +# HELP node_cpu_frequency_max_hertz Maximum cpu thread frequency in hertz. +# TYPE node_cpu_frequency_max_hertz gauge +node_cpu_frequency_max_hertz{cpu="cpu0"} 3.7e+06 +node_cpu_frequency_max_hertz{cpu="cpu1"} 3.7e+06 +# HELP node_cpu_frequency_min_hertz Minimum cpu thread frequency in hertz. +# TYPE node_cpu_frequency_min_hertz gauge +node_cpu_frequency_min_hertz{cpu="cpu0"} 800000 +node_cpu_frequency_min_hertz{cpu="cpu1"} 800000 +# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled. +# TYPE node_cpu_package_throttles_total counter +node_cpu_package_throttles_total{cpu="cpu0"} 30 +node_cpu_package_throttles_total{cpu="cpu1"} 30 # HELP node_disk_bytes_read The total number of bytes read successfully. # TYPE node_disk_bytes_read counter node_disk_bytes_read{device="dm-0"} 5.13708655616e+11 @@ -2142,6 +2162,7 @@ node_scrape_collector_success{collector="arp"} 1 node_scrape_collector_success{collector="bonding"} 1 node_scrape_collector_success{collector="buddyinfo"} 1 node_scrape_collector_success{collector="conntrack"} 1 +node_scrape_collector_success{collector="cpu"} 1 node_scrape_collector_success{collector="diskstats"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_cur_freq b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_cur_freq new file mode 100644 index 00000000..10da2a3e --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_cur_freq @@ -0,0 +1 @@ +1699981 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_max_freq b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_max_freq new file mode 100644 index 00000000..6aa0b685 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_max_freq @@ -0,0 +1 @@ +3700000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_min_freq b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_min_freq new file mode 100644 index 00000000..959e88a8 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu0/cpufreq/scaling_min_freq @@ -0,0 +1 @@ +800000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/core_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/core_throttle_count new file mode 100644 index 00000000..7ed6ff82 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/core_throttle_count @@ -0,0 +1 @@ +5 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/package_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/package_throttle_count new file mode 100644 index 00000000..64bb6b74 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu0/thermal_throttle/package_throttle_count @@ -0,0 +1 @@ +30 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_cur_freq b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_cur_freq new file mode 100644 index 00000000..10da2a3e --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_cur_freq @@ -0,0 +1 @@ +1699981 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_max_freq b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_max_freq new file mode 100644 index 00000000..6aa0b685 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_max_freq @@ -0,0 +1 @@ +3700000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_min_freq b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_min_freq new file mode 100644 index 00000000..959e88a8 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu1/cpufreq/scaling_min_freq @@ -0,0 +1 @@ +800000 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/core_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/core_throttle_count new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/core_throttle_count @@ -0,0 +1 @@ +0 diff --git a/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/package_throttle_count b/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/package_throttle_count new file mode 100644 index 00000000..64bb6b74 --- /dev/null +++ b/collector/fixtures/sys/bus/cpu/devices/cpu1/thermal_throttle/package_throttle_count @@ -0,0 +1 @@ +30 diff --git a/collector/stat_linux.go b/collector/stat_linux.go index 00e13911..5e311089 100644 --- a/collector/stat_linux.go +++ b/collector/stat_linux.go @@ -16,18 +16,13 @@ package collector import ( - "bufio" - "os" - "strconv" - "strings" + "fmt" + + "github.com/prometheus/procfs" "github.com/prometheus/client_golang/prometheus" ) -const ( - userHz = 100 -) - type statCollector struct { cpu *prometheus.Desc intr *prometheus.Desc @@ -83,80 +78,25 @@ func NewStatCollector() (Collector, error) { }, nil } -// Expose kernel and system statistics. +// Update implements Collector and exposes kernel and system statistics. func (c *statCollector) Update(ch chan<- prometheus.Metric) error { - file, err := os.Open(procFilePath("stat")) + fs, err := procfs.NewFS(*procPath) + if err != nil { + return fmt.Errorf("failed to open procfs: %v", err) + } + stats, err := fs.NewStat() if err != nil { return err } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - parts := strings.Fields(scanner.Text()) - if len(parts) == 0 { - continue - } - switch { - case strings.HasPrefix(parts[0], "cpu"): - // Export only per-cpu stats, it can be aggregated up in prometheus. - if parts[0] == "cpu" { - break - } - // Only some of these may be present, depending on kernel version. - cpuFields := []string{"user", "nice", "system", "idle", "iowait", "irq", "softirq", "steal", "guest", "guest_nice"} - // OpenVZ guests lack the "guest" CPU field, which needs to be ignored. - expectedFieldNum := len(cpuFields) + 1 - if expectedFieldNum > len(parts) { - expectedFieldNum = len(parts) - } - for i, v := range parts[1:expectedFieldNum] { - value, err := strconv.ParseFloat(v, 64) - if err != nil { - return err - } - // Convert from ticks to seconds - value /= userHz - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, value, parts[0], cpuFields[i]) - } - case parts[0] == "intr": - // Only expose the overall number, use the 'interrupts' collector for more detail. - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.intr, prometheus.CounterValue, value) - case parts[0] == "ctxt": - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.ctxt, prometheus.CounterValue, value) - case parts[0] == "processes": - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.forks, prometheus.CounterValue, value) - case parts[0] == "btime": - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.btime, prometheus.GaugeValue, value) - case parts[0] == "procs_running": - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.procsRunning, prometheus.GaugeValue, value) - case parts[0] == "procs_blocked": - value, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - ch <- prometheus.MustNewConstMetric(c.procsBlocked, prometheus.GaugeValue, value) - } - } - return scanner.Err() + ch <- prometheus.MustNewConstMetric(c.intr, prometheus.CounterValue, float64(stats.IRQTotal)) + ch <- prometheus.MustNewConstMetric(c.ctxt, prometheus.CounterValue, float64(stats.ContextSwitches)) + ch <- prometheus.MustNewConstMetric(c.forks, prometheus.CounterValue, float64(stats.ProcessCreated)) + + ch <- prometheus.MustNewConstMetric(c.btime, prometheus.GaugeValue, float64(stats.BootTime)) + + ch <- prometheus.MustNewConstMetric(c.procsRunning, prometheus.GaugeValue, float64(stats.ProcessesRunning)) + ch <- prometheus.MustNewConstMetric(c.procsBlocked, prometheus.GaugeValue, float64(stats.ProcessesBlocked)) + + return nil } diff --git a/end-to-end-test.sh b/end-to-end-test.sh index cde2d857..49e6442f 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -6,6 +6,7 @@ collectors=$(cat << COLLECTORS arp buddyinfo conntrack + cpu diskstats drbd edac