Add FS and Disk IO stats

Signed-off-by: Muhammad Shahzeb <mhmdshahzeb1993@gmail.com>
This commit is contained in:
Muhammad Shahzeb 2025-04-01 05:15:50 +05:00
parent 38d32a3977
commit e902d2a5ba
7 changed files with 211 additions and 0 deletions

View file

@ -21,6 +21,7 @@ import (
"fmt"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"
@ -84,6 +85,8 @@ type diskstatsCollector struct {
filesystemInfoDesc typedFactorDesc
deviceMapperInfoDesc typedFactorDesc
ataDescs map[string]typedFactorDesc
ioErrDesc typedFactorDesc
ioDoneDesc typedFactorDesc
logger *slog.Logger
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
}
@ -256,6 +259,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) {
), valueType: prometheus.GaugeValue,
},
},
ioErrDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"),
"Number of IO commands that completed with an error.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
ioDoneDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"),
"Number of completed or rejected IO commands.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
logger: logger,
}
@ -372,6 +389,37 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
}
}
}
// Read IO error counts if available
iodoneCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/iodone_cnt"))
if err != nil {
// Skip if file doesn't exist
if !os.IsNotExist(err) {
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
}
} else {
iodone, err := strconv.ParseUint(strings.TrimSpace(string(iodoneCnt)), 10, 64)
if err != nil {
c.logger.Debug("Error parsing iodone count", "collector", "diskstats", "err", err)
} else {
ch <- c.ioDoneDesc.mustNewConstMetric(float64(iodone), dev)
}
}
ioerrCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/ioerr_cnt"))
if err != nil {
// Skip if file doesn't exist
if !os.IsNotExist(err) {
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
}
} else {
ioerr, err := strconv.ParseUint(strings.TrimSpace(string(ioerrCnt)), 10, 64)
if err != nil {
c.logger.Debug("Error parsing ioerr count", "collector", "diskstats", "err", err)
} else {
ch <- c.ioErrDesc.mustNewConstMetric(float64(ioerr), dev)
}
}
}
return nil
}

View file

@ -179,6 +179,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11

110
collector/ext4_linux.go Normal file
View file

@ -0,0 +1,110 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !noext4
// +build !noext4
package collector
import (
"fmt"
"log/slog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/ext4"
)
// An ext4Collector is a Collector which gathers metrics from ext4 filesystems.
type ext4Collector struct {
fs ext4.FS
logger *slog.Logger
}
func init() {
registerCollector("ext4", defaultEnabled, NewExt4Collector)
}
// NewExt4Collector returns a new Collector exposing ext4 statistics.
func NewExt4Collector(logger *slog.Logger) (Collector, error) {
fs, err := ext4.NewFS(*procPath, *sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}
return &ext4Collector{
fs: fs,
logger: logger,
}, nil
}
// Update implements Collector.
func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error {
stats, err := c.fs.ProcStat()
if err != nil {
return fmt.Errorf("failed to retrieve ext4 stats: %w", err)
}
for _, s := range stats {
c.updateExt4Stats(ch, s)
}
return nil
}
// updateExt4Stats collects statistics for a single ext4 filesystem.
func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) {
const (
subsystem = "ext4"
)
var (
labels = []string{"device"}
)
metrics := []struct {
name string
desc string
value float64
}{
{
name: "errors",
desc: "Number of ext4 filesystem errors.",
value: float64(s.Errors),
},
{
name: "warnings",
desc: "Number of ext4 filesystem warnings.",
value: float64(s.Warnings),
},
{
name: "messages",
desc: "Number of ext4 filesystem log messages.",
value: float64(s.Messages),
},
}
for _, m := range metrics {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, m.name),
m.desc,
labels,
nil,
)
ch <- prometheus.MustNewConstMetric(
desc,
prometheus.CounterValue,
m.value,
s.Name,
)
}
}

View file

@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
@ -2971,6 +2979,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1

View file

@ -576,6 +576,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
@ -2993,6 +3001,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1

View file

@ -803,6 +803,32 @@ Lines: 1
in_sync
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sda/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sda/device/iodone_cnt
Lines: 1
307
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sda/device/ioerr_cnt
Lines: 1
3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sr0/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sr0/device/iodone_cnt
Lines: 1
2767
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sr0/device/ioerr_cnt
Lines: 1
29
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/md6/md/rd3
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View file

@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS
drbd
edac
entropy
ext4
fibrechannel
filefd
hwmon