mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Add collector for metrics of linux software raids
This commit is contained in:
		
							parent
							
								
									d8c99d8728
								
							
						
					
					
						commit
						9f2aa24e12
					
				|  | @ -34,6 +34,7 @@ netstat | Exposes network statistics from `/proc/net/netstat`. This is the same | |||
| stat | Exposes various statistics from `/proc/stat`. This includes CPU usage, boot time, forks and interrupts. | ||||
| textfile | Exposes statistics read from local disk. The `--collector.textfile.directory` flag must be set. | ||||
| time | Exposes the current system time. | ||||
| mdadm | Exposes statistics about devices in `/proc/mdstat` (does nothing if no /proc/mdstat present) | ||||
| 
 | ||||
| 
 | ||||
| ### Disabled by default | ||||
|  |  | |||
							
								
								
									
										26
									
								
								collector/fixtures/mdstat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								collector/fixtures/mdstat
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10] | ||||
| md3 : active raid6 sda1[8] sdh1[7] sdg1[6] sdf1[5] sde1[11] sdd1[3] sdc1[10] sdb1[9] | ||||
|       5853468288 blocks super 1.2 level 6, 64k chunk, algorithm 2 [8/8] [UUUUUUUU] | ||||
|        | ||||
| md127 : active raid1 sdi2[0] sdj2[1] | ||||
|       312319552 blocks [2/2] [UU] | ||||
|        | ||||
| md0 : active raid1 sdi1[0] sdj1[1] | ||||
|       248896 blocks [2/2] [UU] | ||||
|        | ||||
| md4 : inactive raid1 sda3[0] sdb3[1] | ||||
|       4883648 blocks [2/2] [UU] | ||||
| 
 | ||||
| md6 : active raid1 sdb2[2] sda2[0] | ||||
|       195310144 blocks [2/1] [U_] | ||||
|       [=>...................]  recovery =  8.5% (16775552/195310144) finish=17.0min speed=259783K/sec | ||||
| 
 | ||||
| md8 : active raid1 sdb1[1] sda1[0] | ||||
|       195310144 blocks [2/2] [UU] | ||||
|       [=>...................]  resync =  8.5% (16775552/195310144) finish=17.0min speed=259783K/sec | ||||
| 
 | ||||
| md7 : active raid6 sdb1[0] sde1[3] sdd1[2] sdc1[1] | ||||
|       7813735424 blocks super 1.2 level 6, 512k chunk, algorithm 2 [4/3] [U_UU] | ||||
|       bitmap: 0/30 pages [0KB], 65536KB chunk | ||||
| 
 | ||||
| unused devices: <none> | ||||
							
								
								
									
										279
									
								
								collector/mdadm.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										279
									
								
								collector/mdadm.go
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,279 @@ | |||
| // +build !nomdadm
 | ||||
| 
 | ||||
| package collector | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"regexp" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/prometheus/client_golang/prometheus" | ||||
| 	"github.com/prometheus/log" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	statusfile   = "/proc/mdstat" | ||||
| 	statuslineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[[U_]+\]`) | ||||
| 	buildlineRE  = regexp.MustCompile(`\((\d+)/\d+\)`) | ||||
| ) | ||||
| 
 | ||||
| type mdStatus struct { | ||||
| 	mdName       string | ||||
| 	isActive     bool | ||||
| 	disksActive  int64 | ||||
| 	disksTotal   int64 | ||||
| 	blocksTotal  int64 | ||||
| 	blocksSynced int64 | ||||
| } | ||||
| 
 | ||||
| type mdadmCollector struct{} | ||||
| 
 | ||||
| func init() { | ||||
| 	Factories["mdadm"] = NewMdadmCollector | ||||
| } | ||||
| 
 | ||||
| func evalStatusline(statusline string) (active, total, size int64, err error) { | ||||
| 	matches := statuslineRE.FindStringSubmatch(statusline) | ||||
| 
 | ||||
| 	// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
 | ||||
| 	if len(matches) < 3+1 { | ||||
| 		return 0, 0, 0, fmt.Errorf("too few matches found in statusline: %s", statusline) | ||||
| 	} else { | ||||
| 		if len(matches) > 3+1 { | ||||
| 			return 0, 0, 0, fmt.Errorf("too many matches found in statusline: %s", statusline) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	size, err = strconv.ParseInt(matches[1], 10, 64) | ||||
| 	if err != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline) | ||||
| 	} | ||||
| 
 | ||||
| 	total, err = strconv.ParseInt(matches[2], 10, 64) | ||||
| 	if err != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline) | ||||
| 	} | ||||
| 	active, err = strconv.ParseInt(matches[3], 10, 64) | ||||
| 	if err != nil { | ||||
| 		return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline) | ||||
| 	} | ||||
| 
 | ||||
| 	return active, total, size, nil | ||||
| } | ||||
| 
 | ||||
| // Gets the size that has already been synced out of the sync-line.
 | ||||
| func evalBuildline(buildline string) (int64, error) { | ||||
| 	matches := buildlineRE.FindStringSubmatch(buildline) | ||||
| 
 | ||||
| 	// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
 | ||||
| 	if len(matches) < 1+1 { | ||||
| 		return 0, fmt.Errorf("too few matches found in buildline: %s", buildline) | ||||
| 	} | ||||
| 
 | ||||
| 	if len(matches) > 1+1 { | ||||
| 		return 0, fmt.Errorf("too many matches found in buildline: %s", buildline) | ||||
| 	} | ||||
| 
 | ||||
| 	syncedSize, err := strconv.ParseInt(matches[1], 10, 64) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		return 0, fmt.Errorf("%s in buildline: %s", err, buildline) | ||||
| 	} | ||||
| 
 | ||||
| 	return syncedSize, nil | ||||
| } | ||||
| 
 | ||||
| // Parses an mdstat-file and returns a struct with the relevant infos.
 | ||||
| func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) { | ||||
| 	content, err := ioutil.ReadFile(mdStatusFilePath) | ||||
| 	if err != nil { | ||||
| 		return []mdStatus{}, fmt.Errorf("error parsing %s: %s", statusfile, err) | ||||
| 	} | ||||
| 
 | ||||
| 	mdStatusFile := string(content) | ||||
| 
 | ||||
| 	lines := strings.Split(mdStatusFile, "\n") | ||||
| 	var currentMD string | ||||
| 
 | ||||
| 	// Each md has at least the deviceline, statusline and one empty line afterwards
 | ||||
| 	// so we will have probably something of the order len(lines)/3 devices
 | ||||
| 	// so we use that for preallocation.
 | ||||
| 	estimateMDs := len(lines) / 3 | ||||
| 	mdStates := make([]mdStatus, 0, estimateMDs) | ||||
| 
 | ||||
| 	for i, l := range lines { | ||||
| 		if l == "" { | ||||
| 			// Skip entirely empty lines.
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if l[0] == ' ' { | ||||
| 			// Those lines are not the beginning of a md-section.
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") { | ||||
| 			// We aren't interested in lines with general info.
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		mainLine := strings.Split(l, " ") | ||||
| 		if len(mainLine) < 3 { | ||||
| 			return mdStates, fmt.Errorf("error parsing mdline: %s", l) | ||||
| 		} | ||||
| 		currentMD = mainLine[0]               // name of md-device
 | ||||
| 		isActive := (mainLine[2] == "active") // activity status of said md-device
 | ||||
| 
 | ||||
| 		if len(lines) <= i+3 { | ||||
| 			return mdStates, fmt.Errorf("error parsing %s: entry for %s has fewer lines than expected", statusfile, currentMD) | ||||
| 		} | ||||
| 
 | ||||
| 		active, total, size, err := evalStatusline(lines[i+1]) // parse statusline, always present
 | ||||
| 
 | ||||
| 		if err != nil { | ||||
| 			return mdStates, fmt.Errorf("error parsing %s: %s", statusfile, err) | ||||
| 		} | ||||
| 
 | ||||
| 		// Now get the number of synced blocks.
 | ||||
| 		var syncedBlocks int64 | ||||
| 
 | ||||
| 		// Get the line number of the syncing-line.
 | ||||
| 		var j int | ||||
| 		if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line
 | ||||
| 			j = i + 3 | ||||
| 		} else { | ||||
| 			j = i + 2 | ||||
| 		} | ||||
| 
 | ||||
| 		// If device is syncing at the moment, get the number of currently synced bytes,
 | ||||
| 		// otherwise that number equals the size of the device.
 | ||||
| 		if strings.Contains(lines[j], "recovery") || strings.Contains(lines[j], "resync") { | ||||
| 			syncedBlocks, err = evalBuildline(lines[j]) | ||||
| 			if err != nil { | ||||
| 				return mdStates, fmt.Errorf("error parsing %s: %s", statusfile, err) | ||||
| 			} | ||||
| 		} else { | ||||
| 			syncedBlocks = size | ||||
| 		} | ||||
| 
 | ||||
| 		mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks}) | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	return mdStates, nil | ||||
| } | ||||
| 
 | ||||
| // Just returns the pointer to an empty struct as we only use throwaway-metrics.
 | ||||
| func NewMdadmCollector() (Collector, error) { | ||||
| 	return &mdadmCollector{}, nil | ||||
| } | ||||
| 
 | ||||
| var ( | ||||
| 	isActiveDesc = prometheus.NewDesc( | ||||
| 		prometheus.BuildFQName(Namespace, "md", "is_active"), | ||||
| 		"Indicator whether the md-device is active or not.", | ||||
| 		[]string{"device"}, | ||||
| 		nil, | ||||
| 	) | ||||
| 
 | ||||
| 	disksActiveDesc = prometheus.NewDesc( | ||||
| 		prometheus.BuildFQName(Namespace, "md", "disks_active"), | ||||
| 		"Number of active disks of device.", | ||||
| 		[]string{"device"}, | ||||
| 		nil, | ||||
| 	) | ||||
| 
 | ||||
| 	disksTotalDesc = prometheus.NewDesc( | ||||
| 		prometheus.BuildFQName(Namespace, "md", "disks"), | ||||
| 		"Total number of disks of device.", | ||||
| 		[]string{"device"}, | ||||
| 		nil, | ||||
| 	) | ||||
| 
 | ||||
| 	blocksTotalDesc = prometheus.NewDesc( | ||||
| 		prometheus.BuildFQName(Namespace, "md", "blocks"), | ||||
| 		"Total number of blocks on device.", | ||||
| 		[]string{"device"}, | ||||
| 		nil, | ||||
| 	) | ||||
| 
 | ||||
| 	blocksSyncedDesc = prometheus.NewDesc( | ||||
| 		prometheus.BuildFQName(Namespace, "md", "blocks_synced"), | ||||
| 		"Number of blocks synced on device.", | ||||
| 		[]string{"device"}, | ||||
| 		nil, | ||||
| 	) | ||||
| ) | ||||
| 
 | ||||
| func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) (err error) { | ||||
| 	// take care we don't crash on non-existent statusfiles
 | ||||
| 	_, err = os.Stat(statusfile) | ||||
| 	if os.IsNotExist(err) { | ||||
| 		// no such file or directory, nothing to do, just return
 | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	if err != nil { // now things get weird, better to return
 | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// First parse mdstat-file...
 | ||||
| 	mdstate, err := parseMdstat(statusfile) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("error parsing %s: %s", statusfile, err) | ||||
| 	} | ||||
| 
 | ||||
| 	// ... and then plug the result into the metrics to be exported.
 | ||||
| 	var isActiveFloat float64 | ||||
| 	for _, mds := range mdstate { | ||||
| 
 | ||||
| 		log.Debugf("collecting metrics for device %s", mds.mdName) | ||||
| 
 | ||||
| 		if mds.isActive { | ||||
| 			isActiveFloat = 1 | ||||
| 		} else { | ||||
| 			isActiveFloat = 0 | ||||
| 		} | ||||
| 
 | ||||
| 		ch <- prometheus.MustNewConstMetric( | ||||
| 			isActiveDesc, | ||||
| 			prometheus.GaugeValue, | ||||
| 			isActiveFloat, | ||||
| 			mds.mdName, | ||||
| 		) | ||||
| 
 | ||||
| 		ch <- prometheus.MustNewConstMetric( | ||||
| 			disksActiveDesc, | ||||
| 			prometheus.GaugeValue, | ||||
| 			float64(mds.disksActive), | ||||
| 			mds.mdName, | ||||
| 		) | ||||
| 
 | ||||
| 		ch <- prometheus.MustNewConstMetric( | ||||
| 			disksTotalDesc, | ||||
| 			prometheus.GaugeValue, | ||||
| 			float64(mds.disksTotal), | ||||
| 			mds.mdName, | ||||
| 		) | ||||
| 
 | ||||
| 		ch <- prometheus.MustNewConstMetric( | ||||
| 			blocksTotalDesc, | ||||
| 			prometheus.GaugeValue, | ||||
| 			float64(mds.blocksTotal), | ||||
| 			mds.mdName, | ||||
| 		) | ||||
| 
 | ||||
| 		ch <- prometheus.MustNewConstMetric( | ||||
| 			blocksSyncedDesc, | ||||
| 			prometheus.GaugeValue, | ||||
| 			float64(mds.blocksSynced), | ||||
| 			mds.mdName, | ||||
| 		) | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
							
								
								
									
										33
									
								
								collector/mdadm_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								collector/mdadm_test.go
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| package collector | ||||
| 
 | ||||
| import ( | ||||
| 	"testing" | ||||
| ) | ||||
| 
 | ||||
| func TestMdadm(t *testing.T) { | ||||
| 	mdStates, err := parseMdstat("fixtures/mdstat") | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("parsing of reference-file failed entirely: %s", err) | ||||
| 	} | ||||
| 
 | ||||
| 	refs := map[string]mdStatus{ | ||||
| 		"md3":   mdStatus{"md3", true, 8, 8, 5853468288, 5853468288}, | ||||
| 		"md127": mdStatus{"md127", true, 2, 2, 312319552, 312319552}, | ||||
| 		"md0":   mdStatus{"md0", true, 2, 2, 248896, 248896}, | ||||
| 		"md4":   mdStatus{"md4", false, 2, 2, 4883648, 4883648}, | ||||
| 		"md6":   mdStatus{"md6", true, 1, 2, 195310144, 16775552}, | ||||
| 		"md8":   mdStatus{"md8", true, 2, 2, 195310144, 16775552}, | ||||
| 		"md7":   mdStatus{"md7", true, 3, 4, 7813735424, 7813735424}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, md := range mdStates { | ||||
| 		if md != refs[md.mdName] { | ||||
| 			t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.mdName, refs[md.mdName], md) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if len(mdStates) != len(refs) { | ||||
| 		t.Errorf("expected number of parsed md-device to be %s, but was %s", len(refs), len(mdStates)) | ||||
| 	} | ||||
| } | ||||
|  | @ -28,7 +28,7 @@ var ( | |||
| 	memProfile        = flag.String("debug.memprofile-file", "", "Write memory profile to this file upon receipt of SIGUSR1.") | ||||
| 	listenAddress     = flag.String("web.listen-address", ":9100", "Address on which to expose metrics and web interface.") | ||||
| 	metricsPath       = flag.String("web.telemetry-path", "/metrics", "Path under which to expose metrics.") | ||||
| 	enabledCollectors = flag.String("collectors.enabled", "diskstats,filefd,filesystem,loadavg,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname", "Comma-separated list of collectors to use.") | ||||
| 	enabledCollectors = flag.String("collectors.enabled", "diskstats,filefd,filesystem,loadavg,mdadm,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname", "Comma-separated list of collectors to use.") | ||||
| 	printCollectors   = flag.Bool("collectors.print", false, "If true, print available collectors and exit.") | ||||
| 	authUser          = flag.String("auth.user", "", "Username for basic auth.") | ||||
| 	authPass          = flag.String("auth.pass", "", "Password for basic auth.") | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue