filesystem: surface device errors (#2923)

filesystem: surface filesystem device error

Fixes: #2918
---------

Signed-off-by: Pamela Mei i540369 <pamela.mei@sap.com>
This commit is contained in:
Pamela Mei 2024-02-18 19:04:30 +08:00 committed by GitHub
parent 09014c0c5c
commit 12192475c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 16 additions and 13 deletions

View file

@ -60,7 +60,7 @@ var (
"Regexp of filesystem types to ignore for filesystem collector.", "Regexp of filesystem types to ignore for filesystem collector.",
).Hidden().String() ).Hidden().String()
filesystemLabelNames = []string{"device", "mountpoint", "fstype"} filesystemLabelNames = []string{"device", "mountpoint", "fstype", "device_error"}
) )
type filesystemCollector struct { type filesystemCollector struct {
@ -73,7 +73,7 @@ type filesystemCollector struct {
} }
type filesystemLabels struct { type filesystemLabels struct {
device, mountPoint, fsType, options string device, mountPoint, fsType, options, deviceError string
} }
type filesystemStats struct { type filesystemStats struct {
@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.deviceErrorDesc, prometheus.GaugeValue, c.deviceErrorDesc, prometheus.GaugeValue,
s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.roDesc, prometheus.GaugeValue, c.roDesc, prometheus.GaugeValue,
s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
if s.deviceError > 0 { if s.deviceError > 0 {
@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.sizeDesc, prometheus.GaugeValue, c.sizeDesc, prometheus.GaugeValue,
s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.freeDesc, prometheus.GaugeValue, c.freeDesc, prometheus.GaugeValue,
s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.availDesc, prometheus.GaugeValue, c.availDesc, prometheus.GaugeValue,
s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.filesDesc, prometheus.GaugeValue, c.filesDesc, prometheus.GaugeValue,
s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.filesFreeDesc, prometheus.GaugeValue, c.filesFreeDesc, prometheus.GaugeValue,
s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError,
) )
} }
return nil return nil

View file

@ -85,6 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) {
stuckMountsMtx.Lock() stuckMountsMtx.Lock()
if _, ok := stuckMounts[labels.mountPoint]; ok { if _, ok := stuckMounts[labels.mountPoint]; ok {
labels.deviceError = "mountpoint timeout"
stats = append(stats, filesystemStats{ stats = append(stats, filesystemStats{
labels: labels, labels: labels,
deviceError: 1, deviceError: 1,
@ -125,6 +126,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta
close(success) close(success)
if err != nil { if err != nil {
labels.deviceError = err.Error()
level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err)
return filesystemStats{ return filesystemStats{
labels: labels, labels: labels,
@ -211,10 +213,11 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) {
parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) parts[1] = strings.Replace(parts[1], "\\011", "\t", -1)
filesystems = append(filesystems, filesystemLabels{ filesystems = append(filesystems, filesystemLabels{
device: parts[0], device: parts[0],
mountPoint: rootfsStripPrefix(parts[1]), mountPoint: rootfsStripPrefix(parts[1]),
fsType: parts[2], fsType: parts[2],
options: parts[3], options: parts[3],
deviceError: "",
}) })
} }