Add new per NUMA node memory statistics collector

It is sometimes useful to understand the distribution of free/occupied
memory between NUMA nodes to deal with performance problems. To do so,
add new meminfo_numa collector that enables exporting of per node
statistics along with unit and end-to-end tests for it.

Signed-off-by: Pavel Borzenkov <pavel.borzenkov@gmail.com>
This commit is contained in:
Pavel Borzenkov 2015-11-13 12:23:21 +03:00
parent fae388dcab
commit c12d8ea927
6 changed files with 365 additions and 0 deletions

View file

@ -643,6 +643,122 @@ node_memory_Writeback 0
# HELP node_memory_WritebackTmp Memory information field WritebackTmp.
# TYPE node_memory_WritebackTmp gauge
node_memory_WritebackTmp 0
# HELP node_memory_numa_Active Memory information field Active.
# TYPE node_memory_numa_Active gauge
node_memory_numa_Active{node="0"} 5.58733312e+09
node_memory_numa_Active{node="1"} 5.739003904e+09
# HELP node_memory_numa_Active_anon Memory information field Active_anon.
# TYPE node_memory_numa_Active_anon gauge
node_memory_numa_Active_anon{node="0"} 7.07915776e+08
node_memory_numa_Active_anon{node="1"} 6.04635136e+08
# HELP node_memory_numa_Active_file Memory information field Active_file.
# TYPE node_memory_numa_Active_file gauge
node_memory_numa_Active_file{node="0"} 4.879417344e+09
node_memory_numa_Active_file{node="1"} 5.134368768e+09
# HELP node_memory_numa_AnonHugePages Memory information field AnonHugePages.
# TYPE node_memory_numa_AnonHugePages gauge
node_memory_numa_AnonHugePages{node="0"} 1.50994944e+08
node_memory_numa_AnonHugePages{node="1"} 9.2274688e+07
# HELP node_memory_numa_AnonPages Memory information field AnonPages.
# TYPE node_memory_numa_AnonPages gauge
node_memory_numa_AnonPages{node="0"} 8.07112704e+08
node_memory_numa_AnonPages{node="1"} 6.88058368e+08
# HELP node_memory_numa_Bounce Memory information field Bounce.
# TYPE node_memory_numa_Bounce gauge
node_memory_numa_Bounce{node="0"} 0
node_memory_numa_Bounce{node="1"} 0
# HELP node_memory_numa_Dirty Memory information field Dirty.
# TYPE node_memory_numa_Dirty gauge
node_memory_numa_Dirty{node="0"} 20480
node_memory_numa_Dirty{node="1"} 122880
# HELP node_memory_numa_FilePages Memory information field FilePages.
# TYPE node_memory_numa_FilePages gauge
node_memory_numa_FilePages{node="0"} 7.1855017984e+10
node_memory_numa_FilePages{node="1"} 8.5585088512e+10
# HELP node_memory_numa_HugePages_Free Memory information field HugePages_Free.
# TYPE node_memory_numa_HugePages_Free gauge
node_memory_numa_HugePages_Free{node="0"} 0
node_memory_numa_HugePages_Free{node="1"} 0
# HELP node_memory_numa_HugePages_Surp Memory information field HugePages_Surp.
# TYPE node_memory_numa_HugePages_Surp gauge
node_memory_numa_HugePages_Surp{node="0"} 0
node_memory_numa_HugePages_Surp{node="1"} 0
# HELP node_memory_numa_HugePages_Total Memory information field HugePages_Total.
# TYPE node_memory_numa_HugePages_Total gauge
node_memory_numa_HugePages_Total{node="0"} 0
node_memory_numa_HugePages_Total{node="1"} 0
# HELP node_memory_numa_Inactive Memory information field Inactive.
# TYPE node_memory_numa_Inactive gauge
node_memory_numa_Inactive{node="0"} 6.0569788416e+10
node_memory_numa_Inactive{node="1"} 7.3165406208e+10
# HELP node_memory_numa_Inactive_anon Memory information field Inactive_anon.
# TYPE node_memory_numa_Inactive_anon gauge
node_memory_numa_Inactive_anon{node="0"} 3.48626944e+08
node_memory_numa_Inactive_anon{node="1"} 2.91930112e+08
# HELP node_memory_numa_Inactive_file Memory information field Inactive_file.
# TYPE node_memory_numa_Inactive_file gauge
node_memory_numa_Inactive_file{node="0"} 6.0221161472e+10
node_memory_numa_Inactive_file{node="1"} 7.2873476096e+10
# HELP node_memory_numa_KernelStack Memory information field KernelStack.
# TYPE node_memory_numa_KernelStack gauge
node_memory_numa_KernelStack{node="0"} 3.4832384e+07
node_memory_numa_KernelStack{node="1"} 3.1850496e+07
# HELP node_memory_numa_Mapped Memory information field Mapped.
# TYPE node_memory_numa_Mapped gauge
node_memory_numa_Mapped{node="0"} 9.1570176e+08
node_memory_numa_Mapped{node="1"} 8.84850688e+08
# HELP node_memory_numa_MemFree Memory information field MemFree.
# TYPE node_memory_numa_MemFree gauge
node_memory_numa_MemFree{node="0"} 5.4303100928e+10
node_memory_numa_MemFree{node="1"} 4.0586022912e+10
# HELP node_memory_numa_MemTotal Memory information field MemTotal.
# TYPE node_memory_numa_MemTotal gauge
node_memory_numa_MemTotal{node="0"} 1.3740271616e+11
node_memory_numa_MemTotal{node="1"} 1.37438953472e+11
# HELP node_memory_numa_MemUsed Memory information field MemUsed.
# TYPE node_memory_numa_MemUsed gauge
node_memory_numa_MemUsed{node="0"} 8.3099615232e+10
node_memory_numa_MemUsed{node="1"} 9.685293056e+10
# HELP node_memory_numa_Mlocked Memory information field Mlocked.
# TYPE node_memory_numa_Mlocked gauge
node_memory_numa_Mlocked{node="0"} 0
node_memory_numa_Mlocked{node="1"} 0
# HELP node_memory_numa_NFS_Unstable Memory information field NFS_Unstable.
# TYPE node_memory_numa_NFS_Unstable gauge
node_memory_numa_NFS_Unstable{node="0"} 0
node_memory_numa_NFS_Unstable{node="1"} 0
# HELP node_memory_numa_PageTables Memory information field PageTables.
# TYPE node_memory_numa_PageTables gauge
node_memory_numa_PageTables{node="0"} 1.46743296e+08
node_memory_numa_PageTables{node="1"} 1.27254528e+08
# HELP node_memory_numa_SReclaimable Memory information field SReclaimable.
# TYPE node_memory_numa_SReclaimable gauge
node_memory_numa_SReclaimable{node="0"} 4.580478976e+09
node_memory_numa_SReclaimable{node="1"} 4.724822016e+09
# HELP node_memory_numa_SUnreclaim Memory information field SUnreclaim.
# TYPE node_memory_numa_SUnreclaim gauge
node_memory_numa_SUnreclaim{node="0"} 2.23352832e+09
node_memory_numa_SUnreclaim{node="1"} 2.464391168e+09
# HELP node_memory_numa_Shmem Memory information field Shmem.
# TYPE node_memory_numa_Shmem gauge
node_memory_numa_Shmem{node="0"} 4.900864e+07
node_memory_numa_Shmem{node="1"} 8.968192e+07
# HELP node_memory_numa_Slab Memory information field Slab.
# TYPE node_memory_numa_Slab gauge
node_memory_numa_Slab{node="0"} 6.814007296e+09
node_memory_numa_Slab{node="1"} 7.189213184e+09
# HELP node_memory_numa_Unevictable Memory information field Unevictable.
# TYPE node_memory_numa_Unevictable gauge
node_memory_numa_Unevictable{node="0"} 0
node_memory_numa_Unevictable{node="1"} 0
# HELP node_memory_numa_Writeback Memory information field Writeback.
# TYPE node_memory_numa_Writeback gauge
node_memory_numa_Writeback{node="0"} 0
node_memory_numa_Writeback{node="1"} 0
# HELP node_memory_numa_WritebackTmp Memory information field WritebackTmp.
# TYPE node_memory_numa_WritebackTmp gauge
node_memory_numa_WritebackTmp{node="0"} 0
node_memory_numa_WritebackTmp{node="1"} 0
# HELP node_net_bonding_slaves Number of configured slaves per bonding interface.
# TYPE node_net_bonding_slaves gauge
node_net_bonding_slaves{master="bond0"} 0

View file

@ -0,0 +1,29 @@
Node 0 MemTotal: 134182340 kB
Node 0 MemFree: 53030372 kB
Node 0 MemUsed: 81151968 kB
Node 0 Active: 5456380 kB
Node 0 Inactive: 59150184 kB
Node 0 Active(anon): 691324 kB
Node 0 Inactive(anon): 340456 kB
Node 0 Active(file): 4765056 kB
Node 0 Inactive(file): 58809728 kB
Node 0 Unevictable: 0 kB
Node 0 Mlocked: 0 kB
Node 0 Dirty: 20 kB
Node 0 Writeback: 0 kB
Node 0 FilePages: 70170916 kB
Node 0 Mapped: 894240 kB
Node 0 AnonPages: 788196 kB
Node 0 Shmem: 47860 kB
Node 0 KernelStack: 34016 kB
Node 0 PageTables: 143304 kB
Node 0 NFS_Unstable: 0 kB
Node 0 Bounce: 0 kB
Node 0 WritebackTmp: 0 kB
Node 0 Slab: 6654304 kB
Node 0 SReclaimable: 4473124 kB
Node 0 SUnreclaim: 2181180 kB
Node 0 AnonHugePages: 147456 kB
Node 0 HugePages_Total: 0
Node 0 HugePages_Free: 0
Node 0 HugePages_Surp: 0

View file

@ -0,0 +1,29 @@
Node 1 MemTotal: 134217728 kB
Node 1 MemFree: 39634788 kB
Node 1 MemUsed: 94582940 kB
Node 1 Active: 5604496 kB
Node 1 Inactive: 71450592 kB
Node 1 Active(anon): 590464 kB
Node 1 Inactive(anon): 285088 kB
Node 1 Active(file): 5014032 kB
Node 1 Inactive(file): 71165504 kB
Node 1 Unevictable: 0 kB
Node 1 Mlocked: 0 kB
Node 1 Dirty: 120 kB
Node 1 Writeback: 0 kB
Node 1 FilePages: 83579188 kB
Node 1 Mapped: 864112 kB
Node 1 AnonPages: 671932 kB
Node 1 Shmem: 87580 kB
Node 1 KernelStack: 31104 kB
Node 1 PageTables: 124272 kB
Node 1 NFS_Unstable: 0 kB
Node 1 Bounce: 0 kB
Node 1 WritebackTmp: 0 kB
Node 1 Slab: 7020716 kB
Node 1 SReclaimable: 4614084 kB
Node 1 SUnreclaim: 2406632 kB
Node 1 AnonHugePages: 90112 kB
Node 1 HugePages_Total: 0
Node 1 HugePages_Free: 0
Node 1 HugePages_Surp: 0

View file

@ -0,0 +1,131 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !nomeminfo_numa
package collector
import (
"bufio"
"fmt"
"io"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
)
const (
memInfoNumaSubsystem = "memory_numa"
)
type meminfoKey struct {
metricName, numaNode string
}
type meminfoNumaCollector struct {
metricDescs map[string]*prometheus.Desc
}
func init() {
Factories["meminfo_numa"] = NewMeminfoNumaCollector
}
// Takes a prometheus registry and returns a new Collector exposing
// memory stats.
func NewMeminfoNumaCollector() (Collector, error) {
return &meminfoNumaCollector{
metricDescs: map[string]*prometheus.Desc{},
}, nil
}
func (c *meminfoNumaCollector) Update(ch chan<- prometheus.Metric) (err error) {
memInfoNuma, err := getMemInfoNuma()
if err != nil {
return fmt.Errorf("couldn't get NUMA meminfo: %s", err)
}
for k, v := range memInfoNuma {
desc, ok := c.metricDescs[k.metricName]
if !ok {
desc = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, k.metricName),
fmt.Sprintf("Memory information field %s.", k.metricName),
[]string{"node"}, nil)
c.metricDescs[k.metricName] = desc
}
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, v, k.numaNode)
}
return nil
}
func getMemInfoNuma() (map[meminfoKey]float64, error) {
info := make(map[meminfoKey]float64)
nodes, err := filepath.Glob(sysFilePath("devices/system/node/node[0-9]*"))
if err != nil {
return nil, err
}
for _, node := range nodes {
file, err := os.Open(path.Join(node, "meminfo"))
if err != nil {
return nil, err
}
defer file.Close()
numaInfo, err := parseMemInfoNuma(file)
if err != nil {
return nil, err
}
for k, v := range numaInfo {
info[k] = v
}
}
return info, nil
}
func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) {
var (
memInfo = map[meminfoKey]float64{}
scanner = bufio.NewScanner(r)
re = regexp.MustCompile("\\((.*)\\)")
)
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
fv, err := strconv.ParseFloat(parts[3], 64)
if err != nil {
return nil, fmt.Errorf("invalid value in meminfo: %s", err)
}
switch l := len(parts); {
case l == 4: // no unit
case l == 5 && parts[4] == "kB": // has unit
fv *= 1024
default:
return nil, fmt.Errorf("invalid line in meminfo: %s", line)
}
metric := strings.TrimRight(parts[2], ":")
// Active(anon) -> Active_anon
metric = re.ReplaceAllString(metric, "_${1}")
memInfo[meminfoKey{metric, parts[1]}] = fv
}
return memInfo, nil
}

View file

@ -0,0 +1,59 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"os"
"testing"
)
func TestMemInfoNuma(t *testing.T) {
file, err := os.Open("fixtures/sys/devices/system/node/node0/meminfo")
if err != nil {
t.Fatal(err)
}
defer file.Close()
memInfo, err := parseMemInfoNuma(file)
if err != nil {
t.Fatal(err)
}
if want, got := 707915776.0, memInfo[meminfoKey{"Active_anon", "0"}]; want != got {
t.Errorf("want memory Active(anon) %f, got %f", want, got)
}
if want, got := 150994944.0, memInfo[meminfoKey{"AnonHugePages", "0"}]; want != got {
t.Errorf("want memory AnonHugePages %f, got %f", want, got)
}
file, err = os.Open("fixtures/sys/devices/system/node/node1/meminfo")
if err != nil {
t.Fatal(err)
}
defer file.Close()
memInfo, err = parseMemInfoNuma(file)
if err != nil {
t.Fatal(err)
}
if want, got := 291930112.0, memInfo[meminfoKey{"Inactive_anon", "1"}]; want != got {
t.Errorf("want memory Inactive(anon) %f, got %f", want, got)
}
if want, got := 85585088512.0, memInfo[meminfoKey{"FilePages", "1"}]; want != got {
t.Errorf("want memory FilePages %f, got %f", want, got)
}
}

View file

@ -10,6 +10,7 @@ collectors=$(cat << COLLECTORS
loadavg
mdadm
meminfo
meminfo_numa
netdev
netstat
sockstat