mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-01-13 15:01:16 -08:00
filesystem: fix mountTimeout not working issue (#2903)
Signed-off-by: DongWei <jiangxuege@hotmail.com>
This commit is contained in:
parent
6d18ce7bca
commit
9f1f791ac2
|
@ -122,16 +122,8 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta
|
||||||
|
|
||||||
buf := new(unix.Statfs_t)
|
buf := new(unix.Statfs_t)
|
||||||
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf)
|
err := unix.Statfs(rootfsFilePath(labels.mountPoint), buf)
|
||||||
stuckMountsMtx.Lock()
|
|
||||||
close(success)
|
close(success)
|
||||||
|
|
||||||
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
|
||||||
if _, ok := stuckMounts[labels.mountPoint]; ok {
|
|
||||||
level.Debug(c.logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", labels.mountPoint)
|
|
||||||
delete(stuckMounts, labels.mountPoint)
|
|
||||||
}
|
|
||||||
stuckMountsMtx.Unlock()
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
|
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
|
||||||
return filesystemStats{
|
return filesystemStats{
|
||||||
|
@ -161,17 +153,29 @@ func stuckMountWatcher(mountPoint string, success chan struct{}, logger log.Logg
|
||||||
select {
|
select {
|
||||||
case <-success:
|
case <-success:
|
||||||
// Success
|
// Success
|
||||||
|
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
||||||
|
stuckMountsMtx.Lock()
|
||||||
|
defer stuckMountsMtx.Unlock()
|
||||||
|
if _, ok := stuckMounts[mountPoint]; ok {
|
||||||
|
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
|
||||||
|
delete(stuckMounts, mountPoint)
|
||||||
|
}
|
||||||
case <-mountCheckTimer.C:
|
case <-mountCheckTimer.C:
|
||||||
// Timed out, mark mount as stuck
|
// Timed out, mark mount as stuck
|
||||||
stuckMountsMtx.Lock()
|
stuckMountsMtx.Lock()
|
||||||
|
defer stuckMountsMtx.Unlock()
|
||||||
select {
|
select {
|
||||||
case <-success:
|
case <-success:
|
||||||
// Success came in just after the timeout was reached, don't label the mount as stuck
|
// Success came in just after the timeout was reached, don't label the mount as stuck
|
||||||
|
// If the mount has been marked as stuck, unmark it and log it's recovery.
|
||||||
|
if _, ok := stuckMounts[mountPoint]; ok {
|
||||||
|
level.Debug(logger).Log("msg", "Mount point has recovered, monitoring will resume", "mountpoint", mountPoint)
|
||||||
|
delete(stuckMounts, mountPoint)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
|
level.Debug(logger).Log("msg", "Mount point timed out, it is being labeled as stuck and will not be monitored", "mountpoint", mountPoint)
|
||||||
stuckMounts[mountPoint] = struct{}{}
|
stuckMounts[mountPoint] = struct{}{}
|
||||||
}
|
}
|
||||||
stuckMountsMtx.Unlock()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue