| 
									
										
										
										
											2015-09-26 08:36:40 -07:00
										 |  |  | // Copyright 2015 The Prometheus Authors
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-03 04:35:24 -07:00
										 |  |  | //go:build !nofilesystem
 | 
					
						
							| 
									
										
										
										
											2015-05-12 04:06:41 -07:00
										 |  |  | // +build !nofilesystem
 | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | package collector | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bufio" | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2018-10-30 14:12:42 -07:00
										 |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 	"log/slog" | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	"os" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 	"sync" | 
					
						
							|  |  |  | 	"time" | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-07 00:25:05 -08:00
										 |  |  | 	"github.com/alecthomas/kingpin/v2" | 
					
						
							| 
									
										
										
										
											2019-05-10 11:04:06 -07:00
										 |  |  | 	"golang.org/x/sys/unix" | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							| 
									
										
										
										
											2022-01-01 05:09:20 -08:00
										 |  |  | 	defMountPointsExcluded = "^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+|var/lib/containers/storage/.+)($|/)" | 
					
						
							| 
									
										
										
										
											2021-03-23 04:00:06 -07:00
										 |  |  | 	defFSTypesExcluded     = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$" | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-09-13 12:13:59 -07:00
										 |  |  | var mountTimeout = kingpin.Flag("collector.filesystem.mount-timeout", | 
					
						
							|  |  |  | 	"how long to wait for a mount to respond before marking it as stale"). | 
					
						
							|  |  |  | 	Hidden().Default("5s").Duration() | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | var statWorkerCount = kingpin.Flag("collector.filesystem.stat-workers", | 
					
						
							|  |  |  | 	"how many stat calls to process simultaneously"). | 
					
						
							|  |  |  | 	Hidden().Default("4").Int() | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | var stuckMounts = make(map[string]struct{}) | 
					
						
							|  |  |  | var stuckMountsMtx = &sync.Mutex{} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-28 08:44:53 -08:00
										 |  |  | // GetStats returns filesystem stats.
 | 
					
						
							| 
									
										
										
										
											2017-02-28 10:47:20 -08:00
										 |  |  | func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { | 
					
						
							| 
									
										
										
										
											2019-12-31 08:19:37 -08:00
										 |  |  | 	mps, err := mountPointDetails(c.logger) | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2015-09-16 06:34:34 -07:00
										 |  |  | 		return nil, err | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2017-02-28 10:47:20 -08:00
										 |  |  | 	stats := []filesystemStats{} | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	labelChan := make(chan filesystemLabels) | 
					
						
							|  |  |  | 	statChan := make(chan filesystemStats) | 
					
						
							|  |  |  | 	wg := sync.WaitGroup{} | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	workerCount := *statWorkerCount | 
					
						
							|  |  |  | 	if workerCount < 1 { | 
					
						
							|  |  |  | 		workerCount = 1 | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-12-31 08:19:37 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	for i := 0; i < workerCount; i++ { | 
					
						
							|  |  |  | 		wg.Add(1) | 
					
						
							|  |  |  | 		go func() { | 
					
						
							|  |  |  | 			defer wg.Done() | 
					
						
							|  |  |  | 			for labels := range labelChan { | 
					
						
							|  |  |  | 				statChan <- c.processStat(labels) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		}() | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-07-02 16:20:49 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	go func() { | 
					
						
							|  |  |  | 		for _, labels := range mps { | 
					
						
							| 
									
										
										
										
											2024-11-05 01:36:13 -08:00
										 |  |  | 			if c.mountPointFilter.ignored(labels.mountPoint) { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 				c.logger.Debug("Ignoring mount point", "mountpoint", labels.mountPoint) | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 				continue | 
					
						
							| 
									
										
										
										
											2018-07-16 06:56:27 -07:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2024-11-05 01:36:13 -08:00
										 |  |  | 			if c.fsTypeFilter.ignored(labels.fsType) { | 
					
						
							|  |  |  | 				c.logger.Debug("Ignoring fs type", "type", labels.fsType) | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			stuckMountsMtx.Lock() | 
					
						
							|  |  |  | 			if _, ok := stuckMounts[labels.mountPoint]; ok { | 
					
						
							| 
									
										
										
										
											2024-02-18 03:04:30 -08:00
										 |  |  | 				labels.deviceError = "mountpoint timeout" | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 				stats = append(stats, filesystemStats{ | 
					
						
							|  |  |  | 					labels:      labels, | 
					
						
							|  |  |  | 					deviceError: 1, | 
					
						
							|  |  |  | 				}) | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 				c.logger.Debug("Mount point is in an unresponsive state", "mountpoint", labels.mountPoint) | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 				stuckMountsMtx.Unlock() | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			stuckMountsMtx.Unlock() | 
					
						
							|  |  |  | 			labelChan <- labels | 
					
						
							| 
									
										
										
										
											2015-11-10 00:25:04 -08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 		close(labelChan) | 
					
						
							|  |  |  | 		wg.Wait() | 
					
						
							|  |  |  | 		close(statChan) | 
					
						
							|  |  |  | 	}() | 
					
						
							| 
									
										
										
										
											2015-11-10 00:25:04 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	for stat := range statChan { | 
					
						
							|  |  |  | 		stats = append(stats, stat) | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-09-16 06:34:34 -07:00
										 |  |  | 	return stats, nil | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemStats { | 
					
						
							| 
									
										
										
										
											2023-09-19 05:55:58 -07:00
										 |  |  | 	var ro float64 | 
					
						
							|  |  |  | 	for _, option := range strings.Split(labels.options, ",") { | 
					
						
							|  |  |  | 		if option == "ro" { | 
					
						
							|  |  |  | 			ro = 1 | 
					
						
							|  |  |  | 			break | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	success := make(chan struct{}) | 
					
						
							|  |  |  | 	go stuckMountWatcher(labels.mountPoint, success, c.logger) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	buf := new(unix.Statfs_t) | 
					
						
							|  |  |  | 	err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) | 
					
						
							| 
									
										
										
										
											2024-02-20 01:31:08 -08:00
										 |  |  | 	stuckMountsMtx.Lock() | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	close(success) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-20 01:31:08 -08:00
										 |  |  | 	// If the mount has been marked as stuck, unmark it and log it's recovery.
 | 
					
						
							|  |  |  | 	if _, ok := stuckMounts[labels.mountPoint]; ok { | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 		c.logger.Debug("Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint) | 
					
						
							| 
									
										
										
										
											2024-02-20 01:31:08 -08:00
										 |  |  | 		delete(stuckMounts, labels.mountPoint) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	stuckMountsMtx.Unlock() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2024-02-18 03:04:30 -08:00
										 |  |  | 		labels.deviceError = err.Error() | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 		c.logger.Debug("Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 		return filesystemStats{ | 
					
						
							|  |  |  | 			labels:      labels, | 
					
						
							|  |  |  | 			deviceError: 1, | 
					
						
							| 
									
										
										
										
											2023-09-19 05:55:58 -07:00
										 |  |  | 			ro:          ro, | 
					
						
							| 
									
										
										
										
											2023-06-02 12:53:06 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return filesystemStats{ | 
					
						
							|  |  |  | 		labels:    labels, | 
					
						
							|  |  |  | 		size:      float64(buf.Blocks) * float64(buf.Bsize), | 
					
						
							|  |  |  | 		free:      float64(buf.Bfree) * float64(buf.Bsize), | 
					
						
							|  |  |  | 		avail:     float64(buf.Bavail) * float64(buf.Bsize), | 
					
						
							|  |  |  | 		files:     float64(buf.Files), | 
					
						
							|  |  |  | 		filesFree: float64(buf.Ffree), | 
					
						
							|  |  |  | 		ro:        ro, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | // stuckMountWatcher listens on the given success channel and if the channel closes
 | 
					
						
							|  |  |  | // then the watcher does nothing. If instead the timeout is reached, the
 | 
					
						
							|  |  |  | // mount point that is being watched is marked as stuck.
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | func stuckMountWatcher(mountPoint string, success chan struct{}, logger *slog.Logger) { | 
					
						
							| 
									
										
										
										
											2021-10-24 03:48:57 -07:00
										 |  |  | 	mountCheckTimer := time.NewTimer(*mountTimeout) | 
					
						
							|  |  |  | 	defer mountCheckTimer.Stop() | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 	select { | 
					
						
							|  |  |  | 	case <-success: | 
					
						
							|  |  |  | 		// Success
 | 
					
						
							| 
									
										
										
										
											2021-10-24 03:48:57 -07:00
										 |  |  | 	case <-mountCheckTimer.C: | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 		// Timed out, mark mount as stuck
 | 
					
						
							|  |  |  | 		stuckMountsMtx.Lock() | 
					
						
							|  |  |  | 		select { | 
					
						
							|  |  |  | 		case <-success: | 
					
						
							|  |  |  | 			// Success came in just after the timeout was reached, don't label the mount as stuck
 | 
					
						
							|  |  |  | 		default: | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 			logger.Debug("Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 			stuckMounts[mountPoint] = struct{}{} | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-02-20 01:31:08 -08:00
										 |  |  | 		stuckMountsMtx.Unlock() | 
					
						
							| 
									
										
										
										
											2018-07-14 02:10:28 -07:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | func mountPointDetails(logger *slog.Logger) ([]filesystemLabels, error) { | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 	file, err := os.Open(procFilePath("1/mountinfo")) | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 	if errors.Is(err, os.ErrNotExist) { | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 		// Fallback to `/proc/self/mountinfo` if `/proc/1/mountinfo` is missing due hidepid.
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 		logger.Debug("Reading root mounts failed, falling back to self mounts", "err", err) | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 		file, err = os.Open(procFilePath("self/mountinfo")) | 
					
						
							| 
									
										
										
										
											2018-11-30 05:01:55 -08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	defer file.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-10-30 14:12:42 -07:00
										 |  |  | 	return parseFilesystemLabels(file) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { | 
					
						
							|  |  |  | 	var filesystems []filesystemLabels | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	scanner := bufio.NewScanner(r) | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	for scanner.Scan() { | 
					
						
							|  |  |  | 		parts := strings.Fields(scanner.Text()) | 
					
						
							| 
									
										
										
										
											2018-06-06 07:49:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 		if len(parts) < 10 { | 
					
						
							| 
									
										
										
										
											2018-10-30 14:12:42 -07:00
										 |  |  | 			return nil, fmt.Errorf("malformed mount point information: %q", scanner.Text()) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 		major, minor := 0, 0 | 
					
						
							|  |  |  | 		_, err := fmt.Sscanf(parts[2], "%d:%d", &major, &minor) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return nil, fmt.Errorf("malformed mount point information: %q", scanner.Text()) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		m := 5 | 
					
						
							|  |  |  | 		for parts[m+1] != "-" { | 
					
						
							|  |  |  | 			m++ | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-06 07:49:19 -07:00
										 |  |  | 		// Ensure we handle the translation of \040 and \011
 | 
					
						
							|  |  |  | 		// as per fstab(5).
 | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 		parts[4] = strings.Replace(parts[4], "\\040", " ", -1) | 
					
						
							|  |  |  | 		parts[4] = strings.Replace(parts[4], "\\011", "\t", -1) | 
					
						
							| 
									
										
										
										
											2018-06-06 07:49:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-03-22 17:48:18 -07:00
										 |  |  | 		filesystems = append(filesystems, filesystemLabels{ | 
					
						
							| 
									
										
										
										
											2024-07-14 07:33:12 -07:00
										 |  |  | 			device:      parts[m+3], | 
					
						
							|  |  |  | 			mountPoint:  rootfsStripPrefix(parts[4]), | 
					
						
							|  |  |  | 			fsType:      parts[m+2], | 
					
						
							|  |  |  | 			options:     parts[5], | 
					
						
							|  |  |  | 			major:       fmt.Sprint(major), | 
					
						
							|  |  |  | 			minor:       fmt.Sprint(minor), | 
					
						
							| 
									
										
										
										
											2024-02-18 03:04:30 -08:00
										 |  |  | 			deviceError: "", | 
					
						
							| 
									
										
										
										
											2017-03-22 17:48:18 -07:00
										 |  |  | 		}) | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2018-10-30 14:12:42 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-03 06:55:40 -08:00
										 |  |  | 	return filesystems, scanner.Err() | 
					
						
							| 
									
										
										
										
											2014-06-05 12:44:44 -07:00
										 |  |  | } |