From eac3e30f7f7f564c2bd110c7bb97390711e45e32 Mon Sep 17 00:00:00 2001 From: Ukri Niemimuukko Date: Fri, 17 Jan 2020 14:32:16 +0200 Subject: [PATCH] rapl_linux collector This exposes RAPL statistics from /sys/class/powercap. Co-Authored-By: Ben Kochie Signed-off-by: Ukri Niemimuukko --- CHANGELOG.md | 1 + README.md | 1 + collector/fixtures/e2e-64k-page-output.txt | 7 ++ collector/fixtures/e2e-output.txt | 7 ++ collector/fixtures/sys.ttar | 128 +++++++++++++++++++++ collector/rapl_linux.go | 77 +++++++++++++ end-to-end-test.sh | 1 + 7 files changed, 222 insertions(+) create mode 100644 collector/rapl_linux.go diff --git a/CHANGELOG.md b/CHANGELOG.md index a640db2a..19d3cfff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ * [FEATURE] Add new thermal_zone collector #1425 * [FEATURE] Add new cooling_device metrics to thermal zone collector #1445 * [FEATURE] Add new softnet collector #1576 +* [FEATURE] Add RAPL collector #1523 * [ENHANCEMENT] Collect InfiniBand port state and physical state #1357 * [ENHANCEMENT] Include additional XFS runtime statistics. #1423 * [ENHANCEMENT] Report non-fatal collection errors in the exporter metric. #1439 diff --git a/README.md b/README.md index b8bb283f..0a0f7c36 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ netstat | Exposes network statistics from `/proc/net/netstat`. This is the same nfs | Exposes NFS client statistics from `/proc/net/rpc/nfs`. This is the same information as `nfsstat -c`. | Linux nfsd | Exposes NFS kernel server statistics from `/proc/net/rpc/nfsd`. This is the same information as `nfsstat -s`. | Linux pressure | Exposes pressure stall statistics from `/proc/pressure/`. | Linux (kernel 4.20+ and/or [CONFIG\_PSI](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/accounting/psi.txt)) +rapl | Exposes various statistics from `/sys/class/powercap`. | Linux schedstat | Exposes task scheduler statistics from `/proc/schedstat`. | Linux sockstat | Exposes various statistics from `/proc/net/sockstat`. | Linux softnet | Exposes statistics from `/proc/net/softnet_stat`. | Linux diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 0e8e1e2b..f6a02c40 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2498,6 +2498,12 @@ node_qdisc_packets_total{device="wlan0",kind="fq"} 42 # TYPE node_qdisc_requeues_total counter node_qdisc_requeues_total{device="eth0",kind="pfifo_fast"} 2 node_qdisc_requeues_total{device="wlan0",kind="fq"} 1 +# HELP node_rapl_core_joules_total Current RAPL core value in joules +# TYPE node_rapl_core_joules_total counter +node_rapl_core_joules_total{index="0"} 118821.284256 +# HELP node_rapl_package_joules_total Current RAPL package value in joules +# TYPE node_rapl_package_joules_total counter +node_rapl_package_joules_total{index="0"} 240422.366267 # HELP node_schedstat_running_seconds_total Number of seconds CPU spent running a process. # TYPE node_schedstat_running_seconds_total counter node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+06 @@ -2545,6 +2551,7 @@ node_scrape_collector_success{collector="powersupplyclass"} 1 node_scrape_collector_success{collector="pressure"} 1 node_scrape_collector_success{collector="processes"} 1 node_scrape_collector_success{collector="qdisc"} 1 +node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="softnet"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 463de433..1ba311fe 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2519,6 +2519,12 @@ node_qdisc_packets_total{device="wlan0",kind="fq"} 42 # TYPE node_qdisc_requeues_total counter node_qdisc_requeues_total{device="eth0",kind="pfifo_fast"} 2 node_qdisc_requeues_total{device="wlan0",kind="fq"} 1 +# HELP node_rapl_core_joules_total Current RAPL core value in joules +# TYPE node_rapl_core_joules_total counter +node_rapl_core_joules_total{index="0"} 118821.284256 +# HELP node_rapl_package_joules_total Current RAPL package value in joules +# TYPE node_rapl_package_joules_total counter +node_rapl_package_joules_total{index="0"} 240422.366267 # HELP node_schedstat_running_seconds_total Number of seconds CPU spent running a process. # TYPE node_schedstat_running_seconds_total counter node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+06 @@ -2566,6 +2572,7 @@ node_scrape_collector_success{collector="powersupplyclass"} 1 node_scrape_collector_success{collector="pressure"} 1 node_scrape_collector_success{collector="processes"} 1 node_scrape_collector_success{collector="qdisc"} 1 +node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="softnet"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 3704b332..47610fe9 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -1274,6 +1274,134 @@ Lines: 1 11660000 Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/powercap +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/powercap/intel-rapl +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl/enabled +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl/uevent +Lines: 0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/powercap/intel-rapl:0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_0_max_power_uw +Lines: 1 +95000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_0_name +Lines: 1 +long_term +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_0_power_limit_uw +Lines: 1 +4090000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_0_time_window_us +Lines: 1 +999424 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_1_max_power_uw +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_1_name +Lines: 1 +short_term +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_1_power_limit_uw +Lines: 1 +4090000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/constraint_1_time_window_us +Lines: 1 +2440 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/enabled +Lines: 1 +1 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/energy_uj +Lines: 1 +240422366267 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/max_energy_range_uj +Lines: 1 +262143328850 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/name +Lines: 1 +package-0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0/uevent +Lines: 0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/class/powercap/intel-rapl:0:0 +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/constraint_0_max_power_uw +Lines: 0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/constraint_0_name +Lines: 1 +long_term +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/constraint_0_power_limit_uw +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/constraint_0_time_window_us +Lines: 1 +976 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/enabled +Lines: 1 +0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/energy_uj +Lines: 1 +118821284256 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/max_energy_range_uj +Lines: 1 +262143328850 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/name +Lines: 1 +core +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/powercap/intel-rapl:0:0/uevent +Lines: 0 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/class/thermal Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/collector/rapl_linux.go b/collector/rapl_linux.go new file mode 100644 index 00000000..25498c94 --- /dev/null +++ b/collector/rapl_linux.go @@ -0,0 +1,77 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !norapl + +package collector + +import ( + "strconv" + + "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" +) + +type raplCollector struct { + fs sysfs.FS +} + +func init() { + registerCollector("rapl", defaultEnabled, NewRaplCollector) +} + +// NewRaplCollector returns a new Collector exposing RAPL metrics. +func NewRaplCollector(logger log.Logger) (Collector, error) { + fs, err := sysfs.NewFS(*sysPath) + + if err != nil { + return nil, err + } + + collector := raplCollector{ + fs: fs, + } + return &collector, nil +} + +// Update implements Collector and exposes RAPL related metrics. +func (c *raplCollector) Update(ch chan<- prometheus.Metric) error { + // nil zones are fine when platform doesn't have powercap files present. + zones, err := sysfs.GetRaplZones(c.fs) + if err != nil { + return nil + } + + for _, rz := range zones { + newMicrojoules, err := rz.GetEnergyMicrojoules() + if err != nil { + return err + } + index := strconv.Itoa(rz.Index) + + descriptor := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "rapl", rz.Name+"_joules_total"), + "Current RAPL "+rz.Name+" value in joules", + []string{"index"}, nil, + ) + + ch <- prometheus.MustNewConstMetric( + descriptor, + prometheus.CounterValue, + float64(newMicrojoules)/1000000.0, + index, + ) + } + return nil +} diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 73729401..6d0bce35 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -30,6 +30,7 @@ enabled_collectors=$(cat << COLLECTORS nfsd pressure qdisc + rapl schedstat sockstat stat