diff --git a/collector/diskstats_linux.go b/collector/diskstats_linux.go index ad6ff3cb..957aa0bf 100644 --- a/collector/diskstats_linux.go +++ b/collector/diskstats_linux.go @@ -21,6 +21,7 @@ import ( "fmt" "log/slog" "os" + "path/filepath" "strconv" "strings" @@ -84,6 +85,8 @@ type diskstatsCollector struct { filesystemInfoDesc typedFactorDesc deviceMapperInfoDesc typedFactorDesc ataDescs map[string]typedFactorDesc + ioErrDesc typedFactorDesc + ioDoneDesc typedFactorDesc logger *slog.Logger getUdevDeviceProperties func(uint32, uint32) (udevInfo, error) } @@ -256,6 +259,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) { ), valueType: prometheus.GaugeValue, }, }, + ioErrDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"), + "Number of IO commands that completed with an error.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, + ioDoneDesc: typedFactorDesc{ + desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"), + "Number of completed or rejected IO commands.", + []string{"device"}, + nil, + ), valueType: prometheus.CounterValue, + }, logger: logger, } @@ -372,6 +389,37 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error { } } } + + // Read IO error counts if available + iodoneCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/iodone_cnt")) + if err != nil { + // Skip if file doesn't exist + if !os.IsNotExist(err) { + c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err) + } + } else { + iodone, err := strconv.ParseUint(strings.TrimSpace(string(iodoneCnt)), 10, 64) + if err != nil { + c.logger.Debug("Error parsing iodone count", "collector", "diskstats", "err", err) + } else { + ch <- c.ioDoneDesc.mustNewConstMetric(float64(iodone), dev) + } + } + + ioerrCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/ioerr_cnt")) + if err != nil { + // Skip if file doesn't exist + if !os.IsNotExist(err) { + c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err) + } + } else { + ioerr, err := strconv.ParseUint(strings.TrimSpace(string(ioerrCnt)), 10, 64) + if err != nil { + c.logger.Debug("Error parsing ioerr count", "collector", "diskstats", "err", err) + } else { + ch <- c.ioErrDesc.mustNewConstMetric(float64(ioerr), dev) + } + } } return nil } diff --git a/collector/diskstats_linux_test.go b/collector/diskstats_linux_test.go index fd90353f..53ec0184 100644 --- a/collector/diskstats_linux_test.go +++ b/collector/diskstats_linux_test.go @@ -179,6 +179,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 diff --git a/collector/ext4_linux.go b/collector/ext4_linux.go new file mode 100644 index 00000000..6ae591b2 --- /dev/null +++ b/collector/ext4_linux.go @@ -0,0 +1,110 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noext4 +// +build !noext4 + +package collector + +import ( + "fmt" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/ext4" +) + +// An ext4Collector is a Collector which gathers metrics from ext4 filesystems. +type ext4Collector struct { + fs ext4.FS + logger *slog.Logger +} + +func init() { + registerCollector("ext4", defaultEnabled, NewExt4Collector) +} + +// NewExt4Collector returns a new Collector exposing ext4 statistics. +func NewExt4Collector(logger *slog.Logger) (Collector, error) { + fs, err := ext4.NewFS(*procPath, *sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + return &ext4Collector{ + fs: fs, + logger: logger, + }, nil +} + +// Update implements Collector. +func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error { + stats, err := c.fs.ProcStat() + if err != nil { + return fmt.Errorf("failed to retrieve ext4 stats: %w", err) + } + + for _, s := range stats { + c.updateExt4Stats(ch, s) + } + + return nil +} + +// updateExt4Stats collects statistics for a single ext4 filesystem. +func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) { + const ( + subsystem = "ext4" + ) + var ( + labels = []string{"device"} + ) + + metrics := []struct { + name string + desc string + value float64 + }{ + { + name: "errors", + desc: "Number of ext4 filesystem errors.", + value: float64(s.Errors), + }, + { + name: "warnings", + desc: "Number of ext4 filesystem warnings.", + value: float64(s.Warnings), + }, + { + name: "messages", + desc: "Number of ext4 filesystem log messages.", + value: float64(s.Messages), + }, + } + + for _, m := range metrics { + desc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, m.name), + m.desc, + labels, + nil, + ) + + ch <- prometheus.MustNewConstMetric( + desc, + prometheus.CounterValue, + m.value, + s.Name, + ) + } +} diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 1914288e..f793fed9 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -2991,6 +2999,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 634386da..f1c454a1 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -576,6 +576,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001 node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07 node_disk_io_time_weighted_seconds_total{device="sr0"} 0 node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06 +# HELP node_disk_iodone_total Number of completed or rejected IO commands. +# TYPE node_disk_iodone_total counter +node_disk_iodone_total{device="sda"} 307 +node_disk_iodone_total{device="sr0"} 2767 +# HELP node_disk_ioerr_total Number of IO commands that completed with an error. +# TYPE node_disk_ioerr_total counter +node_disk_ioerr_total{device="sda"} 3 +node_disk_ioerr_total{device="sr0"} 29 # HELP node_disk_read_bytes_total The total number of bytes read successfully. # TYPE node_disk_read_bytes_total counter node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11 @@ -3013,6 +3021,7 @@ node_scrape_collector_success{collector="dmi"} 1 node_scrape_collector_success{collector="drbd"} 1 node_scrape_collector_success{collector="edac"} 1 node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="ext4"} 1 node_scrape_collector_success{collector="fibrechannel"} 1 node_scrape_collector_success{collector="filefd"} 1 node_scrape_collector_success{collector="hwmon"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index a0e653e2..d27b4d6b 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -569,6 +569,32 @@ Lines: 1 in_sync Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sda/device +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/iodone_cnt +Lines: 1 +307 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sda/device/ioerr_cnt +Lines: 1 +3 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Directory: sys/block/sr0/device +Mode: 755 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/iodone_cnt +Lines: 1 +2767 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/block/sr0/device/ioerr_cnt +Lines: 1 +29 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/block/md6/md/rd3 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 0bd988ef..7992df6c 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS drbd edac entropy + ext4 fibrechannel filefd hwmon