diff --git a/collector/nvme_linux.go b/collector/nvme_linux.go index d1a9a87b..6be5dafe 100644 --- a/collector/nvme_linux.go +++ b/collector/nvme_linux.go @@ -21,14 +21,24 @@ import ( "fmt" "log/slog" "os" + "path/filepath" + "regexp" + "strconv" + "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/sysfs" ) type nvmeCollector struct { - fs sysfs.FS - logger *slog.Logger + fs sysfs.FS + logger *slog.Logger + namespaceInfo *prometheus.Desc + namespaceCapacityBytes *prometheus.Desc + namespaceSizeBytes *prometheus.Desc + namespaceUsedBytes *prometheus.Desc + namespaceLogicalBlockSizeBytes *prometheus.Desc + info *prometheus.Desc } func init() { @@ -42,9 +52,51 @@ func NewNVMeCollector(logger *slog.Logger) (Collector, error) { return nil, fmt.Errorf("failed to open sysfs: %w", err) } + info := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "info"), + "Non-numeric data from /sys/class/nvme/, value is always 1.", + []string{"device", "firmware_revision", "model", "serial", "state", "cntlid"}, + nil, + ) + namespaceInfo := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_info"), + "Information about NVMe namespaces. Value is always 1", + []string{"device", "nsid", "ana_state"}, nil, + ) + + namespaceCapacityBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_capacity_bytes"), + "Capacity of the NVMe namespace in bytes. Computed as namespace_size * namespace_logical_block_size", + []string{"device", "nsid"}, nil, + ) + + namespaceSizeBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_size_bytes"), + "Size of the NVMe namespace in bytes. Available in /sys/class/nvme///size", + []string{"device", "nsid"}, nil, + ) + + namespaceUsedBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_used_bytes"), + "Used space of the NVMe namespace in bytes. Available in /sys/class/nvme///nuse", + []string{"device", "nsid"}, nil, + ) + + namespaceLogicalBlockSizeBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_logical_block_size_bytes"), + "Logical block size of the NVMe namespace in bytes. Usually 4Kb. Available in /sys/class/nvme///queue/logical_block_size", + []string{"device", "nsid"}, nil, + ) + return &nvmeCollector{ - fs: fs, - logger: logger, + fs: fs, + logger: logger, + namespaceInfo: namespaceInfo, + namespaceCapacityBytes: namespaceCapacityBytes, + namespaceSizeBytes: namespaceSizeBytes, + namespaceUsedBytes: namespaceUsedBytes, + namespaceLogicalBlockSizeBytes: namespaceLogicalBlockSizeBytes, + info: info, }, nil } @@ -59,14 +111,92 @@ func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error { } for _, device := range devices { - infoDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "nvme", "info"), - "Non-numeric data from /sys/class/nvme/, value is always 1.", - []string{"device", "firmware_revision", "model", "serial", "state"}, - nil, - ) infoValue := 1.0 - ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State) + + devicePath := filepath.Join(*sysPath, "class/nvme", device.Name) + cntlid, err := readUintFromFile(filepath.Join(devicePath, "cntlid")) + if err != nil { + c.logger.Debug("failed to read cntlid", "device", device.Name, "err", err) + } + ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State, strconv.FormatUint(cntlid, 10)) + + // Find namespace directories. + namespacePaths, err := filepath.Glob(filepath.Join(devicePath, "nvme[0-9]*c[0-9]*n[0-9]*")) + if err != nil { + c.logger.Error("failed to list NVMe namespaces", "device", device.Name, "err", err) + continue + } + re := regexp.MustCompile(`nvme[0-9]+c[0-9]+n([0-9]+)`) + + for _, namespacePath := range namespacePaths { + // Read namespace data. + match := re.FindStringSubmatch(filepath.Base(namespacePath)) + if len(match) == 0 { + continue + } + nsid := match[1] + nuse, err := readUintFromFile(filepath.Join(namespacePath, "nuse")) + if err != nil { + c.logger.Debug("failed to read nuse", "device", device.Name, "namespace", match[0], "err", err) + } + nsze, err := readUintFromFile(filepath.Join(namespacePath, "size")) + if err != nil { + c.logger.Debug("failed to read size", "device", device.Name, "namespace", match[0], "err", err) + } + lbaSize, err := readUintFromFile(filepath.Join(namespacePath, "queue", "logical_block_size")) + if err != nil { + c.logger.Debug("failed to read queue/logical_block_size", "device", device.Name, "namespace", match[0], "err", err) + } + ncap := nsze * lbaSize + anaState := "unknown" + anaStateSysfs, err := os.ReadFile(filepath.Join(namespacePath, "ana_state")) + if err == nil { + anaState = strings.TrimSpace(string(anaStateSysfs)) + } else { + c.logger.Debug("failed to read ana_state", "device", device.Name, "namespace", match[0], "err", err) + } + + ch <- prometheus.MustNewConstMetric( + c.namespaceInfo, + prometheus.GaugeValue, + 1.0, + device.Name, + nsid, + anaState, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceCapacityBytes, + prometheus.GaugeValue, + float64(ncap), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceSizeBytes, + prometheus.GaugeValue, + float64(nsze), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceUsedBytes, + prometheus.GaugeValue, + float64(nuse*lbaSize), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceLogicalBlockSizeBytes, + prometheus.GaugeValue, + float64(lbaSize), + device.Name, + nsid, + ) + } } return nil