From 53848dde0675c44f024bdf485579cf833f6f812d Mon Sep 17 00:00:00 2001 From: Shashwat Hiregoudar Date: Wed, 9 Jul 2025 11:46:35 +0530 Subject: [PATCH 1/2] adding nvme namespace parameters Signed-off-by: Shashwat Hiregoudar --- collector/fixtures/e2e-64k-page-output.txt | 2 +- collector/fixtures/e2e-output.txt | 2 +- collector/nvme_linux.go | 148 +++++++++++++++++++-- 3 files changed, 139 insertions(+), 13 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 107d713b..66b321a5 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2825,7 +2825,7 @@ node_nfsd_server_rpcs_total 18628 node_nfsd_server_threads 8 # HELP node_nvme_info Non-numeric data from /sys/class/nvme/, value is always 1. # TYPE node_nvme_info gauge -node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 +node_nvme_info{cntlid="1997",device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 # HELP node_os_info A metric with a constant '1' value labeled by build_id, id, id_like, image_id, image_version, name, pretty_name, variant, variant_id, version, version_codename, version_id. # TYPE node_os_info gauge node_os_info{build_id="",id="ubuntu",id_like="debian",image_id="",image_version="",name="Ubuntu",pretty_name="Ubuntu 20.04.2 LTS",variant="",variant_id="",version="20.04.2 LTS (Focal Fossa)",version_codename="focal",version_id="20.04"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 45938a2b..ce555fc8 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2847,7 +2847,7 @@ node_nfsd_server_rpcs_total 18628 node_nfsd_server_threads 8 # HELP node_nvme_info Non-numeric data from /sys/class/nvme/, value is always 1. # TYPE node_nvme_info gauge -node_nvme_info{device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 +node_nvme_info{cntlid="1997",device="nvme0",firmware_revision="1B2QEXP7",model="Samsung SSD 970 PRO 512GB",serial="S680HF8N190894I",state="live"} 1 # HELP node_os_info A metric with a constant '1' value labeled by build_id, id, id_like, image_id, image_version, name, pretty_name, variant, variant_id, version, version_codename, version_id. # TYPE node_os_info gauge node_os_info{build_id="",id="ubuntu",id_like="debian",image_id="",image_version="",name="Ubuntu",pretty_name="Ubuntu 20.04.2 LTS",variant="",variant_id="",version="20.04.2 LTS (Focal Fossa)",version_codename="focal",version_id="20.04"} 1 diff --git a/collector/nvme_linux.go b/collector/nvme_linux.go index d1a9a87b..88218e5c 100644 --- a/collector/nvme_linux.go +++ b/collector/nvme_linux.go @@ -21,14 +21,23 @@ import ( "fmt" "log/slog" "os" + "path/filepath" + "regexp" + "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/sysfs" ) type nvmeCollector struct { - fs sysfs.FS - logger *slog.Logger + fs sysfs.FS + logger *slog.Logger + namespaceInfo *prometheus.Desc + namespaceCapacityBytes *prometheus.Desc + namespaceSizeBytes *prometheus.Desc + namespaceUsedBytes *prometheus.Desc + namespaceLogicalBlockSizeBytes *prometheus.Desc + info *prometheus.Desc } func init() { @@ -42,9 +51,51 @@ func NewNVMeCollector(logger *slog.Logger) (Collector, error) { return nil, fmt.Errorf("failed to open sysfs: %w", err) } + info := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "info"), + "Non-numeric data from /sys/class/nvme/, value is always 1.", + []string{"device", "firmware_revision", "model", "serial", "state", "cntlid"}, + nil, + ) + namespaceInfo := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_info"), + "Information about NVMe namespaces. Value is always 1", + []string{"device", "nsid", "ana_state"}, nil, + ) + + namespaceCapacityBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_capacity_bytes"), + "Capacity of the NVMe namespace in bytes. Computed as namespace_size * namespace_logical_block_size", + []string{"device", "nsid"}, nil, + ) + + namespaceSizeBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_size_bytes"), + "Size of the NVMe namespace in bytes. Available in /sys/class/nvme///size", + []string{"device", "nsid"}, nil, + ) + + namespaceUsedBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_used_bytes"), + "Used space of the NVMe namespace in bytes. Available in /sys/class/nvme///nuse", + []string{"device", "nsid"}, nil, + ) + + namespaceLogicalBlockSizeBytes := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "nvme", "namespace_logical_block_size_bytes"), + "Logical block size of the NVMe namespace in bytes. Usually 4Kb. Available in /sys/class/nvme///queue/logical_block_size", + []string{"device", "nsid"}, nil, + ) + return &nvmeCollector{ - fs: fs, - logger: logger, + fs: fs, + logger: logger, + namespaceInfo: namespaceInfo, + namespaceCapacityBytes: namespaceCapacityBytes, + namespaceSizeBytes: namespaceSizeBytes, + namespaceUsedBytes: namespaceUsedBytes, + namespaceLogicalBlockSizeBytes: namespaceLogicalBlockSizeBytes, + info: info, }, nil } @@ -59,15 +110,90 @@ func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error { } for _, device := range devices { - infoDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "nvme", "info"), - "Non-numeric data from /sys/class/nvme/, value is always 1.", - []string{"device", "firmware_revision", "model", "serial", "state"}, - nil, - ) infoValue := 1.0 - ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State) + + devicePath := filepath.Join(*sysPath, "class/nvme", device.Name) + ch <- prometheus.MustNewConstMetric(c.info, prometheus.GaugeValue, infoValue, device.Name, device.FirmwareRevision, device.Model, device.Serial, device.State, device.ControllerID) + // Find namespace directories. + namespacePaths, err := filepath.Glob(filepath.Join(devicePath, "nvme[0-9]*c[0-9]*n[0-9]*")) + if err != nil { + c.logger.Error("failed to list NVMe namespaces", "device", device.Name, "err", err) + continue + } + re := regexp.MustCompile(`nvme[0-9]+c[0-9]+n([0-9]+)`) + + for _, namespacePath := range namespacePaths { + + // Read namespace data. + match := re.FindStringSubmatch(filepath.Base(namespacePath)) + if len(match) == 0 { + continue + } + nsid := match[1] + nuse, err := readUintFromFile(filepath.Join(namespacePath, "nuse")) + if err != nil { + c.logger.Debug("failed to read nuse", "device", device.Name, "namespace", match[0], "err", err) + } + nsze, err := readUintFromFile(filepath.Join(namespacePath, "size")) + if err != nil { + c.logger.Debug("failed to read size", "device", device.Name, "namespace", match[0], "err", err) + } + lbaSize, err := readUintFromFile(filepath.Join(namespacePath, "queue", "logical_block_size")) + if err != nil { + c.logger.Debug("failed to read queue/logical_block_size", "device", device.Name, "namespace", match[0], "err", err) + } + ncap := nsze * lbaSize + anaState := "unknown" + anaStateSysfs, err := os.ReadFile(filepath.Join(namespacePath, "ana_state")) + if err == nil { + anaState = strings.TrimSpace(string(anaStateSysfs)) + } else { + c.logger.Debug("failed to read ana_state", "device", device.Name, "namespace", match[0], "err", err) + } + + ch <- prometheus.MustNewConstMetric( + c.namespaceInfo, + prometheus.GaugeValue, + 1.0, + device.Name, + nsid, + anaState, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceCapacityBytes, + prometheus.GaugeValue, + float64(ncap), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceSizeBytes, + prometheus.GaugeValue, + float64(nsze), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceUsedBytes, + prometheus.GaugeValue, + float64(nuse*lbaSize), + device.Name, + nsid, + ) + + ch <- prometheus.MustNewConstMetric( + c.namespaceLogicalBlockSizeBytes, + prometheus.GaugeValue, + float64(lbaSize), + device.Name, + nsid, + ) + } } return nil } + From e252d97c6f2534ed24f7e9e871764245d137124b Mon Sep 17 00:00:00 2001 From: Shashwat Hiregoudar Date: Wed, 9 Jul 2025 11:53:27 +0530 Subject: [PATCH 2/2] linting the code Signed-off-by: Shashwat Hiregoudar --- collector/nvme_linux.go | 1 - 1 file changed, 1 deletion(-) diff --git a/collector/nvme_linux.go b/collector/nvme_linux.go index 88218e5c..04af7276 100644 --- a/collector/nvme_linux.go +++ b/collector/nvme_linux.go @@ -196,4 +196,3 @@ func (c *nvmeCollector) Update(ch chan<- prometheus.Metric) error { return nil } -