mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-03-05 21:00:12 -08:00
Add Linux NUMA "numastat" metrics (#249)
* Add Linux NUMA "numastat" metrics Read the `numastat` metrics from /sys/devices/system/node/node* when reading NUMA meminfo metrics. * Update end-to-end test output. * Add `numastat` metrics as counters. * Add tests for error conditions. * Refactor meminfo numa metrics struct * Refactor meminfoKey into a simple struct of metric data. This makes it easier to pass slices of metrics around. * Refactor tests. * Fixup: Add suggested fixes. * Fixup: More fixes * Add another scanner.Err() return * Add "_total" to counter metrics.
This commit is contained in:
parent
081ecc5db0
commit
c6162312f2
|
@ -1011,6 +1011,30 @@ node_memory_numa_Writeback{node="1"} 0
|
||||||
# TYPE node_memory_numa_WritebackTmp gauge
|
# TYPE node_memory_numa_WritebackTmp gauge
|
||||||
node_memory_numa_WritebackTmp{node="0"} 0
|
node_memory_numa_WritebackTmp{node="0"} 0
|
||||||
node_memory_numa_WritebackTmp{node="1"} 0
|
node_memory_numa_WritebackTmp{node="1"} 0
|
||||||
|
# HELP node_memory_numa_interleave_hit_total Memory information field interleave_hit_total.
|
||||||
|
# TYPE node_memory_numa_interleave_hit_total counter
|
||||||
|
node_memory_numa_interleave_hit_total{node="0"} 57146
|
||||||
|
node_memory_numa_interleave_hit_total{node="1"} 57286
|
||||||
|
# HELP node_memory_numa_local_node_total Memory information field local_node_total.
|
||||||
|
# TYPE node_memory_numa_local_node_total counter
|
||||||
|
node_memory_numa_local_node_total{node="0"} 1.93454780853e+11
|
||||||
|
node_memory_numa_local_node_total{node="1"} 3.2671904655e+11
|
||||||
|
# HELP node_memory_numa_numa_foreign_total Memory information field numa_foreign_total.
|
||||||
|
# TYPE node_memory_numa_numa_foreign_total counter
|
||||||
|
node_memory_numa_numa_foreign_total{node="0"} 5.98586233e+10
|
||||||
|
node_memory_numa_numa_foreign_total{node="1"} 1.2624528e+07
|
||||||
|
# HELP node_memory_numa_numa_hit_total Memory information field numa_hit_total.
|
||||||
|
# TYPE node_memory_numa_numa_hit_total counter
|
||||||
|
node_memory_numa_numa_hit_total{node="0"} 1.93460335812e+11
|
||||||
|
node_memory_numa_numa_hit_total{node="1"} 3.26720946761e+11
|
||||||
|
# HELP node_memory_numa_numa_miss_total Memory information field numa_miss_total.
|
||||||
|
# TYPE node_memory_numa_numa_miss_total counter
|
||||||
|
node_memory_numa_numa_miss_total{node="0"} 1.2624528e+07
|
||||||
|
node_memory_numa_numa_miss_total{node="1"} 5.9858626709e+10
|
||||||
|
# HELP node_memory_numa_other_node_total Memory information field other_node_total.
|
||||||
|
# TYPE node_memory_numa_other_node_total counter
|
||||||
|
node_memory_numa_other_node_total{node="0"} 1.8179487e+07
|
||||||
|
node_memory_numa_other_node_total{node="1"} 5.986052692e+10
|
||||||
# HELP node_net_bonding_slaves Number of configured slaves per bonding interface.
|
# HELP node_net_bonding_slaves Number of configured slaves per bonding interface.
|
||||||
# TYPE node_net_bonding_slaves gauge
|
# TYPE node_net_bonding_slaves gauge
|
||||||
node_net_bonding_slaves{master="bond0"} 0
|
node_net_bonding_slaves{master="bond0"} 0
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
numa_hit 193460335812
|
||||||
|
numa_miss 12624528
|
||||||
|
numa_foreign 59858623300
|
||||||
|
interleave_hit 57146
|
||||||
|
local_node 193454780853
|
||||||
|
other_node 18179487
|
|
@ -0,0 +1,6 @@
|
||||||
|
numa_hit 326720946761
|
||||||
|
numa_miss 59858626709
|
||||||
|
numa_foreign 12624528
|
||||||
|
interleave_hit 57286
|
||||||
|
local_node 326719046550
|
||||||
|
other_node 59860526920
|
|
@ -33,8 +33,13 @@ const (
|
||||||
memInfoNumaSubsystem = "memory_numa"
|
memInfoNumaSubsystem = "memory_numa"
|
||||||
)
|
)
|
||||||
|
|
||||||
type meminfoKey struct {
|
var meminfoNodeRE = regexp.MustCompile(`.*devices/system/node/node([0-9]*)`)
|
||||||
metricName, numaNode string
|
|
||||||
|
type meminfoMetric struct {
|
||||||
|
metricName string
|
||||||
|
metricType prometheus.ValueType
|
||||||
|
numaNode string
|
||||||
|
value float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type meminfoNumaCollector struct {
|
type meminfoNumaCollector struct {
|
||||||
|
@ -54,53 +59,70 @@ func NewMeminfoNumaCollector() (Collector, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *meminfoNumaCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
func (c *meminfoNumaCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
||||||
memInfoNuma, err := getMemInfoNuma()
|
metrics, err := getMemInfoNuma()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("couldn't get NUMA meminfo: %s", err)
|
return fmt.Errorf("couldn't get NUMA meminfo: %s", err)
|
||||||
}
|
}
|
||||||
for k, v := range memInfoNuma {
|
for _, v := range metrics {
|
||||||
desc, ok := c.metricDescs[k.metricName]
|
desc, ok := c.metricDescs[v.metricName]
|
||||||
if !ok {
|
if !ok {
|
||||||
desc = prometheus.NewDesc(
|
desc = prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, k.metricName),
|
prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, v.metricName),
|
||||||
fmt.Sprintf("Memory information field %s.", k.metricName),
|
fmt.Sprintf("Memory information field %s.", v.metricName),
|
||||||
[]string{"node"}, nil)
|
[]string{"node"}, nil)
|
||||||
c.metricDescs[k.metricName] = desc
|
c.metricDescs[v.metricName] = desc
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, v, k.numaNode)
|
ch <- prometheus.MustNewConstMetric(desc, v.metricType, v.value, v.numaNode)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMemInfoNuma() (map[meminfoKey]float64, error) {
|
func getMemInfoNuma() ([]meminfoMetric, error) {
|
||||||
info := make(map[meminfoKey]float64)
|
var (
|
||||||
|
metrics []meminfoMetric
|
||||||
|
)
|
||||||
|
|
||||||
nodes, err := filepath.Glob(sysFilePath("devices/system/node/node[0-9]*"))
|
nodes, err := filepath.Glob(sysFilePath("devices/system/node/node[0-9]*"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
file, err := os.Open(path.Join(node, "meminfo"))
|
meminfoFile, err := os.Open(path.Join(node, "meminfo"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer meminfoFile.Close()
|
||||||
|
|
||||||
numaInfo, err := parseMemInfoNuma(file)
|
numaInfo, err := parseMemInfoNuma(meminfoFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
for k, v := range numaInfo {
|
metrics = append(metrics, numaInfo...)
|
||||||
info[k] = v
|
|
||||||
|
numastatFile, err := os.Open(path.Join(node, "numastat"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
defer numastatFile.Close()
|
||||||
|
|
||||||
|
nodeNumber := meminfoNodeRE.FindStringSubmatch(node)
|
||||||
|
if nodeNumber == nil {
|
||||||
|
return nil, fmt.Errorf("device node string didn't match regexp: %s", node)
|
||||||
}
|
}
|
||||||
|
|
||||||
return info, nil
|
numaStat, err := parseMemInfoNumaStat(numastatFile, nodeNumber[1])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
metrics = append(metrics, numaStat...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) {
|
return metrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMemInfoNuma(r io.Reader) ([]meminfoMetric, error) {
|
||||||
var (
|
var (
|
||||||
memInfo = map[meminfoKey]float64{}
|
memInfo []meminfoMetric
|
||||||
scanner = bufio.NewScanner(r)
|
scanner = bufio.NewScanner(r)
|
||||||
re = regexp.MustCompile("\\((.*)\\)")
|
re = regexp.MustCompile("\\((.*)\\)")
|
||||||
)
|
)
|
||||||
|
@ -127,8 +149,34 @@ func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) {
|
||||||
|
|
||||||
// Active(anon) -> Active_anon
|
// Active(anon) -> Active_anon
|
||||||
metric = re.ReplaceAllString(metric, "_${1}")
|
metric = re.ReplaceAllString(metric, "_${1}")
|
||||||
memInfo[meminfoKey{metric, parts[1]}] = fv
|
memInfo = append(memInfo, meminfoMetric{metric, prometheus.GaugeValue, parts[1], fv})
|
||||||
}
|
}
|
||||||
|
|
||||||
return memInfo, nil
|
return memInfo, scanner.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMemInfoNumaStat(r io.Reader, nodeNumber string) ([]meminfoMetric, error) {
|
||||||
|
var (
|
||||||
|
numaStat []meminfoMetric
|
||||||
|
scanner = bufio.NewScanner(r)
|
||||||
|
)
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
parts := strings.Fields(string(line))
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return nil, fmt.Errorf("line scan did not return 2 fields: %s", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
fv, err := strconv.ParseFloat(parts[1], 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid value in numastat: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
numaStat = append(numaStat, meminfoMetric{parts[0] + "_total", prometheus.CounterValue, nodeNumber, fv})
|
||||||
|
}
|
||||||
|
return numaStat, scanner.Err()
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,11 +30,15 @@ func TestMemInfoNuma(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if want, got := 707915776.0, memInfo[meminfoKey{"Active_anon", "0"}]; want != got {
|
if want, got := 707915776.0, memInfo[5].value; want != got {
|
||||||
t.Errorf("want memory Active(anon) %f, got %f", want, got)
|
t.Errorf("want memory Active(anon) value %f, got %f", want, got)
|
||||||
}
|
}
|
||||||
|
|
||||||
if want, got := 150994944.0, memInfo[meminfoKey{"AnonHugePages", "0"}]; want != got {
|
if want, got := "Active_anon", memInfo[5].metricName; want != got {
|
||||||
|
t.Errorf("want metric Active(anon) metricName %s, got %s", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := 150994944.0, memInfo[25].value; want != got {
|
||||||
t.Errorf("want memory AnonHugePages %f, got %f", want, got)
|
t.Errorf("want memory AnonHugePages %f, got %f", want, got)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,11 +53,55 @@ func TestMemInfoNuma(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if want, got := 291930112.0, memInfo[meminfoKey{"Inactive_anon", "1"}]; want != got {
|
if want, got := 291930112.0, memInfo[6].value; want != got {
|
||||||
t.Errorf("want memory Inactive(anon) %f, got %f", want, got)
|
t.Errorf("want memory Inactive(anon) %f, got %f", want, got)
|
||||||
}
|
}
|
||||||
|
|
||||||
if want, got := 85585088512.0, memInfo[meminfoKey{"FilePages", "1"}]; want != got {
|
if want, got := 85585088512.0, memInfo[13].value; want != got {
|
||||||
t.Errorf("want memory FilePages %f, got %f", want, got)
|
t.Errorf("want memory FilePages %f, got %f", want, got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMemInfoNumaStat(t *testing.T) {
|
||||||
|
file, err := os.Open("fixtures/sys/devices/system/node/node0/numastat")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
numaStat, err := parseMemInfoNumaStat(file, "0")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := 193460335812.0, numaStat[0].value; want != got {
|
||||||
|
t.Errorf("want numa stat numa_hit value %f, got %f", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := "numa_hit_total", numaStat[0].metricName; want != got {
|
||||||
|
t.Errorf("want numa stat numa_hit metricName %s, got %s", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := 193454780853.0, numaStat[4].value; want != got {
|
||||||
|
t.Errorf("want numa stat local_node %f, got %f", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
file, err = os.Open("fixtures/sys/devices/system/node/node1/numastat")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
numaStat, err = parseMemInfoNumaStat(file, "1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := 59858626709.0, numaStat[1].value; want != got {
|
||||||
|
t.Errorf("want numa stat numa_miss %f, got %f", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want, got := 59860526920.0, numaStat[5].value; want != got {
|
||||||
|
t.Errorf("want numa stat other_node %f, got %f", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue