From e81cfcca0d43af3ef4f0bd9cfcab2599f3b3976a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 30 Mar 2025 12:35:16 +0300 Subject: [PATCH] mountstats/linux: fix multiple transports statistics labeling When mounting nfs with nconnect, mountstats will show multiple transports. Currently the mountstats exporter fails to create metrics from this information due to repeated entries. Fix this by adding transport metrics a "transport" label enumerating their order in the mountstats output. Signed-off-by: Sagi Grimberg --- collector/fixtures/e2e-64k-page-output.txt | 40 +++++++++--------- collector/fixtures/e2e-output.txt | 40 +++++++++--------- collector/mountstats_linux.go | 48 ++++++++++++---------- 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 107d713b..f5dfdb17 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2267,44 +2267,44 @@ node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mount node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 # HELP node_mountstats_nfs_transport_backlog_queue_total Total number of items added to the RPC backlog queue. # TYPE node_mountstats_nfs_transport_backlog_queue_total counter -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bad_transaction_ids_total Number of times the NFS server sent a response with a transaction ID unknown to this client. # TYPE node_mountstats_nfs_transport_bad_transaction_ids_total counter -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bind_total Number of times the client has had to establish a connection from scratch to the NFS server. # TYPE node_mountstats_nfs_transport_bind_total counter -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_connect_total Number of times the client has made a TCP connection to the NFS server. # TYPE node_mountstats_nfs_transport_connect_total counter -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 1 -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 1 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_idle_time_seconds Duration since the NFS mount last saw any RPC traffic, in seconds. # TYPE node_mountstats_nfs_transport_idle_time_seconds gauge -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 11 -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 11 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_maximum_rpc_slots Maximum number of simultaneously active RPC requests ever used. # TYPE node_mountstats_nfs_transport_maximum_rpc_slots gauge -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 24 -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 24 # HELP node_mountstats_nfs_transport_pending_queue_total Total number of items added to the RPC transmission pending queue. # TYPE node_mountstats_nfs_transport_pending_queue_total counter -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 5726 -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 5726 # HELP node_mountstats_nfs_transport_receives_total Number of RPC responses for this mount received from the NFS server. # TYPE node_mountstats_nfs_transport_receives_total counter -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_transport_sending_queue_total Total number of items added to the RPC transmission sending queue. # TYPE node_mountstats_nfs_transport_sending_queue_total counter -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 26 -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 26 # HELP node_mountstats_nfs_transport_sends_total Number of RPC requests for this mount sent to the NFS server. # TYPE node_mountstats_nfs_transport_sends_total counter -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_write_bytes_total Number of bytes written using the write() syscall. # TYPE node_mountstats_nfs_write_bytes_total counter node_mountstats_nfs_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 45938a2b..dca8d483 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2289,44 +2289,44 @@ node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mount node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 # HELP node_mountstats_nfs_transport_backlog_queue_total Total number of items added to the RPC backlog queue. # TYPE node_mountstats_nfs_transport_backlog_queue_total counter -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bad_transaction_ids_total Number of times the NFS server sent a response with a transaction ID unknown to this client. # TYPE node_mountstats_nfs_transport_bad_transaction_ids_total counter -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bind_total Number of times the client has had to establish a connection from scratch to the NFS server. # TYPE node_mountstats_nfs_transport_bind_total counter -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_connect_total Number of times the client has made a TCP connection to the NFS server. # TYPE node_mountstats_nfs_transport_connect_total counter -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 1 -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 1 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_idle_time_seconds Duration since the NFS mount last saw any RPC traffic, in seconds. # TYPE node_mountstats_nfs_transport_idle_time_seconds gauge -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 11 -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 11 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_maximum_rpc_slots Maximum number of simultaneously active RPC requests ever used. # TYPE node_mountstats_nfs_transport_maximum_rpc_slots gauge -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 24 -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 24 # HELP node_mountstats_nfs_transport_pending_queue_total Total number of items added to the RPC transmission pending queue. # TYPE node_mountstats_nfs_transport_pending_queue_total counter -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 5726 -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 5726 # HELP node_mountstats_nfs_transport_receives_total Number of RPC responses for this mount received from the NFS server. # TYPE node_mountstats_nfs_transport_receives_total counter -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_transport_sending_queue_total Total number of items added to the RPC transmission sending queue. # TYPE node_mountstats_nfs_transport_sending_queue_total counter -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 26 -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 26 # HELP node_mountstats_nfs_transport_sends_total Number of RPC requests for this mount sent to the NFS server. # TYPE node_mountstats_nfs_transport_sends_total counter -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_write_bytes_total Number of bytes written using the write() syscall. # TYPE node_mountstats_nfs_write_bytes_total counter node_mountstats_nfs_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 diff --git a/collector/mountstats_linux.go b/collector/mountstats_linux.go index 03f1a9b6..2c79cfa2 100644 --- a/collector/mountstats_linux.go +++ b/collector/mountstats_linux.go @@ -19,6 +19,7 @@ package collector import ( "fmt" "log/slog" + "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" @@ -127,8 +128,9 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { ) var ( - labels = []string{"export", "protocol", "mountaddr"} - opLabels = []string{"export", "protocol", "mountaddr", "operation"} + labels = []string{"export", "protocol", "mountaddr"} + opLabels = []string{"export", "protocol", "mountaddr", "operation"} + translabels = []string{"export", "protocol", "mountaddr", "transport"} ) return &mountStatsCollector{ @@ -198,70 +200,70 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { NFSTransportBindTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bind_total"), "Number of times the client has had to establish a connection from scratch to the NFS server.", - labels, + translabels, nil, ), NFSTransportConnectTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_connect_total"), "Number of times the client has made a TCP connection to the NFS server.", - labels, + translabels, nil, ), NFSTransportIdleTimeSeconds: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_idle_time_seconds"), "Duration since the NFS mount last saw any RPC traffic, in seconds.", - labels, + translabels, nil, ), NFSTransportSendsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sends_total"), "Number of RPC requests for this mount sent to the NFS server.", - labels, + translabels, nil, ), NFSTransportReceivesTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_receives_total"), "Number of RPC responses for this mount received from the NFS server.", - labels, + translabels, nil, ), NFSTransportBadTransactionIDsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bad_transaction_ids_total"), "Number of times the NFS server sent a response with a transaction ID unknown to this client.", - labels, + translabels, nil, ), NFSTransportBacklogQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_backlog_queue_total"), "Total number of items added to the RPC backlog queue.", - labels, + translabels, nil, ), NFSTransportMaximumRPCSlots: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_maximum_rpc_slots"), "Maximum number of simultaneously active RPC requests ever used.", - labels, + translabels, nil, ), NFSTransportSendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sending_queue_total"), "Total number of items added to the RPC transmission sending queue.", - labels, + translabels, nil, ), NFSTransportPendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_pending_queue_total"), "Total number of items added to the RPC transmission pending queue.", - labels, + translabels, nil, ), @@ -618,74 +620,76 @@ func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *pro ) for i := range s.Transport { + translabelValues := []string{export, protocol, mountAddress, strconv.Itoa(i)} + ch <- prometheus.MustNewConstMetric( c.NFSTransportBindTotal, prometheus.CounterValue, float64(s.Transport[i].Bind), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportConnectTotal, prometheus.CounterValue, float64(s.Transport[i].Connect), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportIdleTimeSeconds, prometheus.GaugeValue, float64(s.Transport[i].IdleTimeSeconds%float64Mantissa), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportSendsTotal, prometheus.CounterValue, float64(s.Transport[i].Sends), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportReceivesTotal, prometheus.CounterValue, float64(s.Transport[i].Receives), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportBadTransactionIDsTotal, prometheus.CounterValue, float64(s.Transport[i].BadTransactionIDs), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportBacklogQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativeBacklog), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportMaximumRPCSlots, prometheus.GaugeValue, float64(s.Transport[i].MaximumRPCSlotsUsed), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportSendingQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativeSendingQueue), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportPendingQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativePendingQueue), - labelValues..., + translabelValues..., ) }