mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-11-14 09:34:23 -08:00
690efa61e8
Log a single error message when the udev data directory (`/run/udev/data` by default) is unreadable, and then don't try to get device properties out of it. Also lower the log level from error to debug when we can't parse the udev files properly, since these messages would be sent every time the node exporter gets scraped. Signed-off-by: Benoît Knecht <bknecht@protonmail.ch>
399 lines
12 KiB
Go
399 lines
12 KiB
Go
// Copyright 2015 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//go:build !nodiskstats
|
|
// +build !nodiskstats
|
|
|
|
package collector
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/procfs/blockdevice"
|
|
)
|
|
|
|
const (
|
|
secondsPerTick = 1.0 / 1000.0
|
|
|
|
// Read sectors and write sectors are the "standard UNIX 512-byte sectors, not any device- or filesystem-specific block size."
|
|
// See also https://www.kernel.org/doc/Documentation/block/stat.txt
|
|
unixSectorSize = 512.0
|
|
|
|
diskstatsDefaultIgnoredDevices = "^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"
|
|
|
|
// See udevadm(8).
|
|
udevDevicePropertyPrefix = "E:"
|
|
|
|
// Udev device properties.
|
|
udevDMLVLayer = "DM_LV_LAYER"
|
|
udevDMLVName = "DM_LV_NAME"
|
|
udevDMName = "DM_NAME"
|
|
udevDMUUID = "DM_UUID"
|
|
udevDMVGName = "DM_VG_NAME"
|
|
udevIDATA = "ID_ATA"
|
|
udevIDATARotationRateRPM = "ID_ATA_ROTATION_RATE_RPM"
|
|
udevIDATASATA = "ID_ATA_SATA"
|
|
udevIDATASATASignalRateGen1 = "ID_ATA_SATA_SIGNAL_RATE_GEN1"
|
|
udevIDATASATASignalRateGen2 = "ID_ATA_SATA_SIGNAL_RATE_GEN2"
|
|
udevIDATAWriteCache = "ID_ATA_WRITE_CACHE"
|
|
udevIDATAWriteCacheEnabled = "ID_ATA_WRITE_CACHE_ENABLED"
|
|
udevIDFSType = "ID_FS_TYPE"
|
|
udevIDFSUsage = "ID_FS_USAGE"
|
|
udevIDFSUUID = "ID_FS_UUID"
|
|
udevIDFSVersion = "ID_FS_VERSION"
|
|
udevIDModel = "ID_MODEL"
|
|
udevIDPath = "ID_PATH"
|
|
udevIDRevision = "ID_REVISION"
|
|
udevIDSerialShort = "ID_SERIAL_SHORT"
|
|
udevIDWWN = "ID_WWN"
|
|
)
|
|
|
|
type typedFactorDesc struct {
|
|
desc *prometheus.Desc
|
|
valueType prometheus.ValueType
|
|
}
|
|
|
|
type udevInfo map[string]string
|
|
|
|
func (d *typedFactorDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric {
|
|
return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...)
|
|
}
|
|
|
|
type diskstatsCollector struct {
|
|
deviceFilter deviceFilter
|
|
fs blockdevice.FS
|
|
infoDesc typedFactorDesc
|
|
descs []typedFactorDesc
|
|
filesystemInfoDesc typedFactorDesc
|
|
deviceMapperInfoDesc typedFactorDesc
|
|
ataDescs map[string]typedFactorDesc
|
|
logger log.Logger
|
|
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
|
|
}
|
|
|
|
func init() {
|
|
registerCollector("diskstats", defaultEnabled, NewDiskstatsCollector)
|
|
}
|
|
|
|
// NewDiskstatsCollector returns a new Collector exposing disk device stats.
|
|
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
|
|
func NewDiskstatsCollector(logger log.Logger) (Collector, error) {
|
|
var diskLabelNames = []string{"device"}
|
|
fs, err := blockdevice.NewFS(*procPath, *sysPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open sysfs: %w", err)
|
|
}
|
|
|
|
deviceFilter, err := newDiskstatsDeviceFilter(logger)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse device filter flags: %w", err)
|
|
}
|
|
|
|
collector := diskstatsCollector{
|
|
deviceFilter: deviceFilter,
|
|
fs: fs,
|
|
infoDesc: typedFactorDesc{
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "info"),
|
|
"Info of /sys/block/<block_device>.",
|
|
[]string{"device", "major", "minor", "path", "wwn", "model", "serial", "revision"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
descs: []typedFactorDesc{
|
|
{
|
|
desc: readsCompletedDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "reads_merged_total"),
|
|
"The total number of reads merged.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: readBytesDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: readTimeSecondsDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: writesCompletedDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "writes_merged_total"),
|
|
"The number of writes merged.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: writtenBytesDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: writeTimeSecondsDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "io_now"),
|
|
"The number of I/Os currently in progress.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
{
|
|
desc: ioTimeSecondsDesc, valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "io_time_weighted_seconds_total"),
|
|
"The weighted # of seconds spent doing I/Os.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "discards_completed_total"),
|
|
"The total number of discards completed successfully.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "discards_merged_total"),
|
|
"The total number of discards merged.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "discarded_sectors_total"),
|
|
"The total number of sectors discarded successfully.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "discard_time_seconds_total"),
|
|
"This is the total number of seconds spent by all discards.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "flush_requests_total"),
|
|
"The total number of flush requests completed successfully",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
{
|
|
desc: prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, diskSubsystem, "flush_requests_time_seconds_total"),
|
|
"This is the total number of seconds spent by all flush requests.",
|
|
diskLabelNames,
|
|
nil,
|
|
), valueType: prometheus.CounterValue,
|
|
},
|
|
},
|
|
filesystemInfoDesc: typedFactorDesc{
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "filesystem_info"),
|
|
"Info about disk filesystem.",
|
|
[]string{"device", "type", "usage", "uuid", "version"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
deviceMapperInfoDesc: typedFactorDesc{
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "device_mapper_info"),
|
|
"Info about disk device mapper.",
|
|
[]string{"device", "name", "uuid", "vg_name", "lv_name", "lv_layer"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
ataDescs: map[string]typedFactorDesc{
|
|
udevIDATAWriteCache: {
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_write_cache"),
|
|
"ATA disk has a write cache.",
|
|
[]string{"device"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
udevIDATAWriteCacheEnabled: {
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_write_cache_enabled"),
|
|
"ATA disk has its write cache enabled.",
|
|
[]string{"device"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
udevIDATARotationRateRPM: {
|
|
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ata_rotation_rate_rpm"),
|
|
"ATA disk rotation rate in RPMs (0 for SSDs).",
|
|
[]string{"device"},
|
|
nil,
|
|
), valueType: prometheus.GaugeValue,
|
|
},
|
|
},
|
|
logger: logger,
|
|
}
|
|
|
|
// Only enable getting device properties from udev if the directory is readable.
|
|
if stat, err := os.Stat(*udevDataPath); err != nil || !stat.IsDir() {
|
|
level.Error(logger).Log("msg", "Failed to open directory, disabling udev device properties", "path", *udevDataPath)
|
|
} else {
|
|
collector.getUdevDeviceProperties = getUdevDeviceProperties
|
|
}
|
|
|
|
return &collector, nil
|
|
}
|
|
|
|
func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
|
|
diskStats, err := c.fs.ProcDiskstats()
|
|
if err != nil {
|
|
return fmt.Errorf("couldn't get diskstats: %w", err)
|
|
}
|
|
|
|
for _, stats := range diskStats {
|
|
dev := stats.DeviceName
|
|
if c.deviceFilter.ignored(dev) {
|
|
continue
|
|
}
|
|
|
|
info, err := getUdevDeviceProperties(stats.MajorNumber, stats.MinorNumber)
|
|
if err != nil {
|
|
level.Debug(c.logger).Log("msg", "Failed to parse udev info", "err", err)
|
|
}
|
|
|
|
ch <- c.infoDesc.mustNewConstMetric(1.0, dev,
|
|
fmt.Sprint(stats.MajorNumber),
|
|
fmt.Sprint(stats.MinorNumber),
|
|
info[udevIDPath],
|
|
info[udevIDWWN],
|
|
info[udevIDModel],
|
|
info[udevIDSerialShort],
|
|
info[udevIDRevision],
|
|
)
|
|
|
|
statCount := stats.IoStatsCount - 3 // Total diskstats record count, less MajorNumber, MinorNumber and DeviceName
|
|
|
|
for i, val := range []float64{
|
|
float64(stats.ReadIOs),
|
|
float64(stats.ReadMerges),
|
|
float64(stats.ReadSectors) * unixSectorSize,
|
|
float64(stats.ReadTicks) * secondsPerTick,
|
|
float64(stats.WriteIOs),
|
|
float64(stats.WriteMerges),
|
|
float64(stats.WriteSectors) * unixSectorSize,
|
|
float64(stats.WriteTicks) * secondsPerTick,
|
|
float64(stats.IOsInProgress),
|
|
float64(stats.IOsTotalTicks) * secondsPerTick,
|
|
float64(stats.WeightedIOTicks) * secondsPerTick,
|
|
float64(stats.DiscardIOs),
|
|
float64(stats.DiscardMerges),
|
|
float64(stats.DiscardSectors),
|
|
float64(stats.DiscardTicks) * secondsPerTick,
|
|
float64(stats.FlushRequestsCompleted),
|
|
float64(stats.TimeSpentFlushing) * secondsPerTick,
|
|
} {
|
|
if i >= statCount {
|
|
break
|
|
}
|
|
ch <- c.descs[i].mustNewConstMetric(val, dev)
|
|
}
|
|
|
|
if fsType := info[udevIDFSType]; fsType != "" {
|
|
ch <- c.filesystemInfoDesc.mustNewConstMetric(1.0, dev,
|
|
fsType,
|
|
info[udevIDFSUsage],
|
|
info[udevIDFSUUID],
|
|
info[udevIDFSVersion],
|
|
)
|
|
}
|
|
|
|
if name := info[udevDMName]; name != "" {
|
|
ch <- c.deviceMapperInfoDesc.mustNewConstMetric(1.0, dev,
|
|
name,
|
|
info[udevDMUUID],
|
|
info[udevDMVGName],
|
|
info[udevDMLVName],
|
|
info[udevDMLVLayer],
|
|
)
|
|
}
|
|
|
|
if ata := info[udevIDATA]; ata != "" {
|
|
for attr, desc := range c.ataDescs {
|
|
str, ok := info[attr]
|
|
if !ok {
|
|
level.Debug(c.logger).Log("msg", "Udev attribute does not exist", "attribute", attr)
|
|
continue
|
|
}
|
|
|
|
if value, err := strconv.ParseFloat(str, 64); err == nil {
|
|
ch <- desc.mustNewConstMetric(value, dev)
|
|
} else {
|
|
level.Error(c.logger).Log("msg", "Failed to parse ATA value", "err", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getUdevDeviceProperties(major, minor uint32) (udevInfo, error) {
|
|
filename := udevDataFilePath(fmt.Sprintf("b%d:%d", major, minor))
|
|
|
|
data, err := os.Open(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer data.Close()
|
|
|
|
info := make(udevInfo)
|
|
|
|
scanner := bufio.NewScanner(data)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
|
|
// We're only interested in device properties.
|
|
if !strings.HasPrefix(line, udevDevicePropertyPrefix) {
|
|
continue
|
|
}
|
|
|
|
line = strings.TrimPrefix(line, udevDevicePropertyPrefix)
|
|
|
|
/* TODO: After we drop support for Go 1.17, the condition below can be simplified to:
|
|
|
|
if name, value, found := strings.Cut(line, "="); found {
|
|
info[name] = value
|
|
}
|
|
*/
|
|
if fields := strings.SplitN(line, "=", 2); len(fields) == 2 {
|
|
info[fields[0]] = fields[1]
|
|
}
|
|
}
|
|
|
|
return info, nil
|
|
}
|