From e00120773351408fd47c840c3519b2b307f3f41c Mon Sep 17 00:00:00 2001 From: Scott Laird Date: Mon, 1 Jan 2024 11:45:49 -0800 Subject: [PATCH] Add Linux SAS metrics This adds metrics for SAS (Serial-Attached SCSI) devices. Issue prometheus#2386 --- collector/sas_linux.go | 215 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 collector/sas_linux.go diff --git a/collector/sas_linux.go b/collector/sas_linux.go new file mode 100644 index 00000000..32cd21c0 --- /dev/null +++ b/collector/sas_linux.go @@ -0,0 +1,215 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !nocpu +// +build !nocpu + +package collector + +// Exported metrics, from /sys/class/sas_phy//*: +// +// - invalid_dword_count +// - loss_of_dword_sync_count +// - negotiated_linkrate +// - phy_reset_problem_count +// - running_disparity_error_count +// +// Four of these are counters, one is a gauge. + +import ( + "fmt" + + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" +) + +type sasCollector struct { + fs sysfs.FS + + sasInvalidDword *prometheus.Desc + sasLossOfDwordSync *prometheus.Desc + sasNegotiatedLinkrate *prometheus.Desc + sasPhyResetProblem *prometheus.Desc + sasRunningDisparityError *prometheus.Desc + + logger log.Logger +} + +const sasCollectorSubsystem = "sas" + +func init() { + registerCollector("sas", defaultEnabled, NewSASCollector) +} + +// NewSASCollector returns a new Collector exposing SAS storage statistics. +func NewSASCollector(logger log.Logger) (Collector, error) { + fs, err := sysfs.NewFS(*sysPath) + if err != nil { + return nil, fmt.Errorf("failed to open procfs: %w", err) + } + c := &sasCollector{ + fs: fs, + sasInvalidDword: prometheus.NewDesc( + prometheus.BuildFQName(namespace, sasCollectorSubsystem, "phy_invalid_dword"), + "SAS PHY count of invalid dwords.", + []string{"phy", "port", "host", "expander", "block_device", "sas_address"}, nil, + ), + sasLossOfDwordSync: prometheus.NewDesc( + prometheus.BuildFQName(namespace, sasCollectorSubsystem, "phy_loss_of_dword_sync"), + "SAS PHY count of lost dword sync.", + []string{"phy", "port", "host", "expander", "block_device", "sas_address"}, nil, + ), + sasNegotiatedLinkrate: prometheus.NewDesc( + prometheus.BuildFQName(namespace, sasCollectorSubsystem, "phy_negotiated_linkrate"), + "SAS PHY negotiated link rate in Gbps.", + []string{"phy", "port", "host", "expander", "block_device", "sas_address"}, nil, + ), + sasPhyResetProblem: prometheus.NewDesc( + prometheus.BuildFQName(namespace, sasCollectorSubsystem, "phy_reset_problem"), + "SAS PHY count of reset problems.", + []string{"phy", "port", "host", "expander", "block_device", "sas_address"}, nil, + ), + sasRunningDisparityError: prometheus.NewDesc( + prometheus.BuildFQName(namespace, sasCollectorSubsystem, "phy_running_disparity_error"), + "SAS PHY count of running disparity errors.", + []string{"phy", "port", "host", "expander", "block_device", "sas_address"}, nil, + ), + logger: logger, + } + + return c, nil +} + +// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. +func (s *sasCollector) Update(ch chan<- prometheus.Metric) error { + sasEndDevices, err := s.fs.SASEndDeviceClass() + if err != nil { + return err + } + + sasExpanders, err := s.fs.SASExpanderClass() + if err != nil { + return err + } + + sasHosts, err := s.fs.SASHostClass() + if err != nil { + return err + } + + sasPhys, err := s.fs.SASPhyClass() + if err != nil { + return err + } + + sasPorts, err := s.fs.SASPortClass() + if err != nil { + return err + } + + for _, phy := range sasPhys { + // One per SAS link in the system. + phyName := phy.Name + portName := phy.SASPort + sasAddress := phy.SASAddress + hostName := "" + expanderName := "" + blockDeviceName := "" + + // Check to see if this Phy is connected directly to a SAS host (~controller card) + host := sasHosts.GetByPhy(phyName) + if host != nil { + hostName = host.Name + } + + expander := sasExpanders.GetByPhy(phyName) + if expander != nil { + expanderName = expander.Name + + // If we didn't find a host name before, then maybe it's connected + // host -> expander -> Phy. So try checking the expander's Ports? + // TODO: Do this recursively for nested expanders. + if hostName == "" { + port := sasPorts.GetByExpander(expanderName) + if port != nil { + host := sasHosts.GetByPort(port.Name) + if host != nil { + hostName = host.Name + } + } + } + + } + + // This is the best mapping that I've found for phy->blockDevice. Yes, it's horrible. + port := sasPorts.GetByName(portName) + if port != nil { + for _, portEndDevice := range port.EndDevices { + endDevice := sasEndDevices.GetByName(portEndDevice) + if endDevice != nil { + if len(endDevice.BlockDevices) > 0 { + blockDeviceName = endDevice.BlockDevices[0] + } + } + } + } + + ch <- prometheus.MustNewConstMetric(s.sasInvalidDword, + prometheus.CounterValue, + float64(phy.InvalidDwordCount), + phyName, + portName, + hostName, + expanderName, + blockDeviceName, + sasAddress) + ch <- prometheus.MustNewConstMetric(s.sasLossOfDwordSync, + prometheus.CounterValue, + float64(phy.LossOfDwordSyncCount), + phyName, + portName, + hostName, + expanderName, + blockDeviceName, + sasAddress) + ch <- prometheus.MustNewConstMetric(s.sasNegotiatedLinkrate, + prometheus.GaugeValue, + float64(phy.NegotiatedLinkrate), + phyName, + portName, + hostName, + expanderName, + blockDeviceName, + sasAddress) + ch <- prometheus.MustNewConstMetric(s.sasPhyResetProblem, + prometheus.CounterValue, + float64(phy.PhyResetProblemCount), + phyName, + portName, + hostName, + expanderName, + blockDeviceName, + sasAddress) + ch <- prometheus.MustNewConstMetric(s.sasRunningDisparityError, + prometheus.CounterValue, + float64(phy.RunningDisparityErrorCount), + phyName, + portName, + hostName, + expanderName, + blockDeviceName, + sasAddress) + } + return nil +}