Release 0.15.2 (#751)

* cpu: Support processor-less (memory-only) NUMA nodes (#734)

* cpu: Support processor-less (memory-only) NUMA nodes

Processor-less (memory-only) NUMA nodes exist e.g. in systems that use
Intel Optane drives for RAM expansion using Intel Memory Drive
Technology (IMDT).

IMDT RAM expansion supports two modes:

* "Unify Remote Memory domains": present a processor-less (memory-only)
  NUMA domain, which is the default
* "Expand local memory domains": to expand each processor’s memory domain
  with a portion of the memory made available by Optane and IMDT

This commit fixes a crash in the first case (when "cpulist" is empty).

Here's an example of such a system:

$ numastat -m|head -n5

Per-node system memory usage (in MBs):
                          Node 0          Node 1          Node 2           Total
                 --------------- --------------- --------------- ---------------
MemTotal               118239.56       130816.00       464384.00       713439.56

$ for i in {0..2}; do echo -n "$i: " ; cat /sys/bus/node/devices/node$i/cpulist ; done
0: 0-7,16-23
1: 8-15,24-31
2:

$ /opt/vsmp/bin/vsmpversion -vvv
Memory Drive Technology: 8.2.1455.74 (Sep 28 2017 13:09:59)
System configuration:
    Boards:      3
       1 x Proc. + I/O + Memory
       2 x NVM devices (Intel SSDPED1K375GAQ)
    Processors:  2, Cores: 16, Threads: 32
        Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz Stepping 01
    Memory (MB): 713472 (of 977450), Cache: 251416, Private: 12562
       1 x 249088MB   [262036/   678/12270]
       1 x 232192MB   [357707/125369/  146]  82:00.0#1
       1 x 232192MB   [357707/125369/  146]  83:00.0#1

* cpu: rename some variables (pkg => node)

* cpu: Use %v not %q in log.Debugf() format strings

* Release v0.15.2
This commit is contained in:
Ben Kochie 2017-12-06 12:15:42 +01:00 committed by GitHub
parent 4a6d2cd4d8
commit 9a5e842b85
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 15 deletions

View file

@ -1,3 +1,7 @@
## v0.15.2 / 2017-12-06
* [BUGFIX] cpu: Support processor-less (memory-only) NUMA nodes #734
## v0.15.1 / 2017-11-07 ## v0.15.1 / 2017-11-07
* [BUGFIX] xfs: expose correct fields, fix metric names #708 * [BUGFIX] xfs: expose correct fields, fix metric names #708

View file

@ -1 +1 @@
0.15.1 0.15.2

View file

@ -111,7 +111,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
_, cpuname := filepath.Split(cpu) _, cpuname := filepath.Split(cpu)
if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) { if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing cpufreq", cpu) log.Debugf("CPU %v is missing cpufreq", cpu)
} else { } else {
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz). // sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt // See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
@ -132,7 +132,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
} }
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) { if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing thermal_throttle", cpu) log.Debugf("CPU %v is missing thermal_throttle", cpu)
continue continue
} }
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil { if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
@ -141,36 +141,43 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname) ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
} }
pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*")) nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
if err != nil { if err != nil {
return err return err
} }
// package/node loop // package / NUMA node loop
for _, pkg := range pkgs { for _, node := range nodes {
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) { if _, err := os.Stat(filepath.Join(node, "cpulist")); os.IsNotExist(err) {
log.Debugf("package %q is missing cpulist", pkg) log.Debugf("NUMA node %v is missing cpulist", node)
continue continue
} }
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist")) cpulist, err := ioutil.ReadFile(filepath.Join(node, "cpulist"))
if err != nil { if err != nil {
log.Debugf("could not read cpulist of package %q", pkg) log.Debugf("could not read cpulist of NUMA node %v", node)
return err return err
} }
// cpulist example of one package/node with HT: "0-11,24-35" // cpulist example of one package/node with HT: "0-11,24-35"
line := strings.Split(string(cpulist), "\n")[0] line := strings.Split(string(cpulist), "\n")[0]
if line == "" {
// Skip processor-less (memory-only) NUMA nodes.
// E.g. RAM expansion with Intel Optane Drive(s) using
// Intel Memory Drive Technology (IMDT).
log.Debugf("skipping processor-less (memory-only) NUMA node %v", node)
continue
}
firstCPU := strings.FieldsFunc(line, func(r rune) bool { firstCPU := strings.FieldsFunc(line, func(r rune) bool {
return r == '-' || r == ',' return r == '-' || r == ','
})[0] })[0]
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) { if _, err := os.Stat(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU) log.Debugf("Node %v CPU %v is missing package_throttle", node, firstCPU)
continue continue
} }
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil { if value, err = readUintFromFile(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
return err return err
} }
pkgno := digitRegexp.FindAllString(pkg, 1)[0] nodeno := digitRegexp.FindAllString(node, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno) ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), nodeno)
} }
return nil return nil

View file

@ -150,6 +150,14 @@ Mode: 644
Path: sys/bus/node/devices/node0/cpulist Path: sys/bus/node/devices/node0/cpulist
Lines: 1 Lines: 1
0-3 0-3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node1
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node1/cpulist
Lines: 1
Mode: 644 Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class Directory: sys/class