filesystem: fix mountTimeout not working issue (#2903)

Signed-off-by: DongWei <jiangxuege@hotmail.com>
This commit is contained in:
DongWei 2024-02-14 22:36:16 +08:00 committed by GitHub
parent 6d18ce7bca
commit 9f1f791ac2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -122,16 +122,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta
buf := new(unix.Statfs_t) buf := new(unix.Statfs_t)
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf) err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf)
stuckMountsMtx.Lock()
close(success) close(success)
// If the mount has been marked as stuck, unmark it and log it's recovery.
if _, ok := stuckMounts[labels.mountPoint]; ok {
level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint)
delete(stuckMounts, labels.mountPoint)
}
stuckMountsMtx.Unlock()
if err != nil { if err != nil {
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
return filesystemStats{ return filesystemStats{
@ -161,17 +153,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg
select { select {
case <-success: case <-success:
// Success // Success
// If the mount has been marked as stuck, unmark it and log it's recovery.
stuckMountsMtx.Lock()
defer stuckMountsMtx.Unlock()
if _, ok := stuckMounts[mountPoint]; ok {
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
delete(stuckMounts, mountPoint)
}
case <-mountCheckTimer.C: case <-mountCheckTimer.C:
// Timed out, mark mount as stuck // Timed out, mark mount as stuck
stuckMountsMtx.Lock() stuckMountsMtx.Lock()
defer stuckMountsMtx.Unlock()
select { select {
case <-success: case <-success:
// Success came in just after the timeout was reached, don't label the mount as stuck // Success came in just after the timeout was reached, don't label the mount as stuck
// If the mount has been marked as stuck, unmark it and log it's recovery.
if _, ok := stuckMounts[mountPoint]; ok {
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
delete(stuckMounts, mountPoint)
}
default: default:
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint) level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
stuckMounts[mountPoint] = struct{}{} stuckMounts[mountPoint] = struct{}{}
} }
stuckMountsMtx.Unlock()
} }
} }