mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Add Linux NUMA "numastat" metrics (#249)
* Add Linux NUMA "numastat" metrics Read the `numastat` metrics from /sys/devices/system/node/node* when reading NUMA meminfo metrics. * Update end-to-end test output. * Add `numastat` metrics as counters. * Add tests for error conditions. * Refactor meminfo numa metrics struct * Refactor meminfoKey into a simple struct of metric data. This makes it easier to pass slices of metrics around. * Refactor tests. * Fixup: Add suggested fixes. * Fixup: More fixes * Add another scanner.Err() return * Add "_total" to counter metrics.
This commit is contained in:
		
							parent
							
								
									081ecc5db0
								
							
						
					
					
						commit
						c6162312f2
					
				|  | @ -1011,6 +1011,30 @@ node_memory_numa_Writeback{node="1"} 0 | |||
| # TYPE node_memory_numa_WritebackTmp gauge | ||||
| node_memory_numa_WritebackTmp{node="0"} 0 | ||||
| node_memory_numa_WritebackTmp{node="1"} 0 | ||||
| # HELP node_memory_numa_interleave_hit_total Memory information field interleave_hit_total. | ||||
| # TYPE node_memory_numa_interleave_hit_total counter | ||||
| node_memory_numa_interleave_hit_total{node="0"} 57146 | ||||
| node_memory_numa_interleave_hit_total{node="1"} 57286 | ||||
| # HELP node_memory_numa_local_node_total Memory information field local_node_total. | ||||
| # TYPE node_memory_numa_local_node_total counter | ||||
| node_memory_numa_local_node_total{node="0"} 1.93454780853e+11 | ||||
| node_memory_numa_local_node_total{node="1"} 3.2671904655e+11 | ||||
| # HELP node_memory_numa_numa_foreign_total Memory information field numa_foreign_total. | ||||
| # TYPE node_memory_numa_numa_foreign_total counter | ||||
| node_memory_numa_numa_foreign_total{node="0"} 5.98586233e+10 | ||||
| node_memory_numa_numa_foreign_total{node="1"} 1.2624528e+07 | ||||
| # HELP node_memory_numa_numa_hit_total Memory information field numa_hit_total. | ||||
| # TYPE node_memory_numa_numa_hit_total counter | ||||
| node_memory_numa_numa_hit_total{node="0"} 1.93460335812e+11 | ||||
| node_memory_numa_numa_hit_total{node="1"} 3.26720946761e+11 | ||||
| # HELP node_memory_numa_numa_miss_total Memory information field numa_miss_total. | ||||
| # TYPE node_memory_numa_numa_miss_total counter | ||||
| node_memory_numa_numa_miss_total{node="0"} 1.2624528e+07 | ||||
| node_memory_numa_numa_miss_total{node="1"} 5.9858626709e+10 | ||||
| # HELP node_memory_numa_other_node_total Memory information field other_node_total. | ||||
| # TYPE node_memory_numa_other_node_total counter | ||||
| node_memory_numa_other_node_total{node="0"} 1.8179487e+07 | ||||
| node_memory_numa_other_node_total{node="1"} 5.986052692e+10 | ||||
| # HELP node_net_bonding_slaves Number of configured slaves per bonding interface. | ||||
| # TYPE node_net_bonding_slaves gauge | ||||
| node_net_bonding_slaves{master="bond0"} 0 | ||||
|  |  | |||
|  | @ -0,0 +1,6 @@ | |||
| numa_hit 193460335812 | ||||
| numa_miss 12624528 | ||||
| numa_foreign 59858623300 | ||||
| interleave_hit 57146 | ||||
| local_node 193454780853 | ||||
| other_node 18179487 | ||||
|  | @ -0,0 +1,6 @@ | |||
| numa_hit 326720946761 | ||||
| numa_miss 59858626709 | ||||
| numa_foreign 12624528 | ||||
| interleave_hit 57286 | ||||
| local_node 326719046550 | ||||
| other_node 59860526920 | ||||
|  | @ -33,8 +33,13 @@ const ( | |||
| 	memInfoNumaSubsystem = "memory_numa" | ||||
| ) | ||||
| 
 | ||||
| type meminfoKey struct { | ||||
| 	metricName, numaNode string | ||||
| var meminfoNodeRE = regexp.MustCompile(`.*devices/system/node/node([0-9]*)`) | ||||
| 
 | ||||
| type meminfoMetric struct { | ||||
| 	metricName string | ||||
| 	metricType prometheus.ValueType | ||||
| 	numaNode   string | ||||
| 	value      float64 | ||||
| } | ||||
| 
 | ||||
| type meminfoNumaCollector struct { | ||||
|  | @ -54,53 +59,70 @@ func NewMeminfoNumaCollector() (Collector, error) { | |||
| } | ||||
| 
 | ||||
| func (c *meminfoNumaCollector) Update(ch chan<- prometheus.Metric) (err error) { | ||||
| 	memInfoNuma, err := getMemInfoNuma() | ||||
| 	metrics, err := getMemInfoNuma() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("couldn't get NUMA meminfo: %s", err) | ||||
| 	} | ||||
| 	for k, v := range memInfoNuma { | ||||
| 		desc, ok := c.metricDescs[k.metricName] | ||||
| 	for _, v := range metrics { | ||||
| 		desc, ok := c.metricDescs[v.metricName] | ||||
| 		if !ok { | ||||
| 			desc = prometheus.NewDesc( | ||||
| 				prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, k.metricName), | ||||
| 				fmt.Sprintf("Memory information field %s.", k.metricName), | ||||
| 				prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, v.metricName), | ||||
| 				fmt.Sprintf("Memory information field %s.", v.metricName), | ||||
| 				[]string{"node"}, nil) | ||||
| 			c.metricDescs[k.metricName] = desc | ||||
| 			c.metricDescs[v.metricName] = desc | ||||
| 		} | ||||
| 		ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, v, k.numaNode) | ||||
| 		ch <- prometheus.MustNewConstMetric(desc, v.metricType, v.value, v.numaNode) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func getMemInfoNuma() (map[meminfoKey]float64, error) { | ||||
| 	info := make(map[meminfoKey]float64) | ||||
| func getMemInfoNuma() ([]meminfoMetric, error) { | ||||
| 	var ( | ||||
| 		metrics []meminfoMetric | ||||
| 	) | ||||
| 
 | ||||
| 	nodes, err := filepath.Glob(sysFilePath("devices/system/node/node[0-9]*")) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	for _, node := range nodes { | ||||
| 		file, err := os.Open(path.Join(node, "meminfo")) | ||||
| 		meminfoFile, err := os.Open(path.Join(node, "meminfo")) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer file.Close() | ||||
| 		defer meminfoFile.Close() | ||||
| 
 | ||||
| 		numaInfo, err := parseMemInfoNuma(file) | ||||
| 		numaInfo, err := parseMemInfoNuma(meminfoFile) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		for k, v := range numaInfo { | ||||
| 			info[k] = v | ||||
| 		metrics = append(metrics, numaInfo...) | ||||
| 
 | ||||
| 		numastatFile, err := os.Open(path.Join(node, "numastat")) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		defer numastatFile.Close() | ||||
| 
 | ||||
| 		nodeNumber := meminfoNodeRE.FindStringSubmatch(node) | ||||
| 		if nodeNumber == nil { | ||||
| 			return nil, fmt.Errorf("device node string didn't match regexp: %s", node) | ||||
| 		} | ||||
| 
 | ||||
| 		numaStat, err := parseMemInfoNumaStat(numastatFile, nodeNumber[1]) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		metrics = append(metrics, numaStat...) | ||||
| 	} | ||||
| 
 | ||||
| 	return info, nil | ||||
| 	return metrics, nil | ||||
| } | ||||
| 
 | ||||
| func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) { | ||||
| func parseMemInfoNuma(r io.Reader) ([]meminfoMetric, error) { | ||||
| 	var ( | ||||
| 		memInfo = map[meminfoKey]float64{} | ||||
| 		memInfo []meminfoMetric | ||||
| 		scanner = bufio.NewScanner(r) | ||||
| 		re      = regexp.MustCompile("\\((.*)\\)") | ||||
| 	) | ||||
|  | @ -127,8 +149,34 @@ func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) { | |||
| 
 | ||||
| 		// Active(anon) -> Active_anon
 | ||||
| 		metric = re.ReplaceAllString(metric, "_${1}") | ||||
| 		memInfo[meminfoKey{metric, parts[1]}] = fv | ||||
| 		memInfo = append(memInfo, meminfoMetric{metric, prometheus.GaugeValue, parts[1], fv}) | ||||
| 	} | ||||
| 
 | ||||
| 	return memInfo, nil | ||||
| 	return memInfo, scanner.Err() | ||||
| } | ||||
| 
 | ||||
| func parseMemInfoNumaStat(r io.Reader, nodeNumber string) ([]meminfoMetric, error) { | ||||
| 	var ( | ||||
| 		numaStat []meminfoMetric | ||||
| 		scanner  = bufio.NewScanner(r) | ||||
| 	) | ||||
| 
 | ||||
| 	for scanner.Scan() { | ||||
| 		line := strings.TrimSpace(scanner.Text()) | ||||
| 		if line == "" { | ||||
| 			continue | ||||
| 		} | ||||
| 		parts := strings.Fields(string(line)) | ||||
| 		if len(parts) != 2 { | ||||
| 			return nil, fmt.Errorf("line scan did not return 2 fields: %s", line) | ||||
| 		} | ||||
| 
 | ||||
| 		fv, err := strconv.ParseFloat(parts[1], 64) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("invalid value in numastat: %s", err) | ||||
| 		} | ||||
| 
 | ||||
| 		numaStat = append(numaStat, meminfoMetric{parts[0] + "_total", prometheus.CounterValue, nodeNumber, fv}) | ||||
| 	} | ||||
| 	return numaStat, scanner.Err() | ||||
| } | ||||
|  |  | |||
|  | @ -30,11 +30,15 @@ func TestMemInfoNuma(t *testing.T) { | |||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 707915776.0, memInfo[meminfoKey{"Active_anon", "0"}]; want != got { | ||||
| 		t.Errorf("want memory Active(anon) %f, got %f", want, got) | ||||
| 	if want, got := 707915776.0, memInfo[5].value; want != got { | ||||
| 		t.Errorf("want memory Active(anon) value %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 150994944.0, memInfo[meminfoKey{"AnonHugePages", "0"}]; want != got { | ||||
| 	if want, got := "Active_anon", memInfo[5].metricName; want != got { | ||||
| 		t.Errorf("want metric Active(anon) metricName %s, got %s", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 150994944.0, memInfo[25].value; want != got { | ||||
| 		t.Errorf("want memory AnonHugePages %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
|  | @ -49,11 +53,55 @@ func TestMemInfoNuma(t *testing.T) { | |||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 291930112.0, memInfo[meminfoKey{"Inactive_anon", "1"}]; want != got { | ||||
| 	if want, got := 291930112.0, memInfo[6].value; want != got { | ||||
| 		t.Errorf("want memory Inactive(anon) %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 85585088512.0, memInfo[meminfoKey{"FilePages", "1"}]; want != got { | ||||
| 	if want, got := 85585088512.0, memInfo[13].value; want != got { | ||||
| 		t.Errorf("want memory FilePages %f, got %f", want, got) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMemInfoNumaStat(t *testing.T) { | ||||
| 	file, err := os.Open("fixtures/sys/devices/system/node/node0/numastat") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	defer file.Close() | ||||
| 
 | ||||
| 	numaStat, err := parseMemInfoNumaStat(file, "0") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 193460335812.0, numaStat[0].value; want != got { | ||||
| 		t.Errorf("want numa stat numa_hit value %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := "numa_hit_total", numaStat[0].metricName; want != got { | ||||
| 		t.Errorf("want numa stat numa_hit metricName %s, got %s", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 193454780853.0, numaStat[4].value; want != got { | ||||
| 		t.Errorf("want numa stat local_node %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	file, err = os.Open("fixtures/sys/devices/system/node/node1/numastat") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	defer file.Close() | ||||
| 
 | ||||
| 	numaStat, err = parseMemInfoNumaStat(file, "1") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 59858626709.0, numaStat[1].value; want != got { | ||||
| 		t.Errorf("want numa stat numa_miss %f, got %f", want, got) | ||||
| 	} | ||||
| 
 | ||||
| 	if want, got := 59860526920.0, numaStat[5].value; want != got { | ||||
| 		t.Errorf("want numa stat other_node %f, got %f", want, got) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue