From 392f9accd6d180880533801f41115cf887e0a2fc Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Thu, 18 Apr 2024 14:52:31 -0500 Subject: [PATCH 1/7] udp_queues_linux.go: Expose UDP drops via gauge analogous to queue sizes. Signed-off-by: Chris Cleeland --- collector/udp_queues_linux.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go index 2923936e..104fda39 100644 --- a/collector/udp_queues_linux.go +++ b/collector/udp_queues_linux.go @@ -61,6 +61,7 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { if errIPv4 == nil { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s4.Drops), "drops", "v4") } else { if errors.Is(errIPv4, os.ErrNotExist) { c.logger.Debug("not collecting ipv4 based metrics") @@ -73,6 +74,7 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { if errIPv6 == nil { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s6.Drops), "drops", "v6") } else { if errors.Is(errIPv6, os.ErrNotExist) { c.logger.Debug("not collecting ipv6 based metrics") From 619f013cfc97d88141be819b9d18efad467861bf Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Mon, 22 Apr 2024 10:34:02 -0500 Subject: [PATCH 2/7] Fix failing e2e test. Update e2e test output to include the drops. Signed-off-by: Chris Cleeland --- collector/fixtures/e2e-64k-page-output.txt | 1 + collector/fixtures/e2e-output.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 55583bad..9e9d2960 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -3244,6 +3244,7 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # TYPE node_time_zone_offset_seconds gauge # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. # TYPE node_udp_queues gauge +node_udp_queues{ip="v4",queue="drops"} 100 node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index eb6be483..5294f274 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -3266,6 +3266,7 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # TYPE node_time_zone_offset_seconds gauge # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. # TYPE node_udp_queues gauge +node_udp_queues{ip="v4",queue="drops"} 100 node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. From 5a060c27808634a38aace3ba3fe6c5c7d39710f0 Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Mon, 22 Apr 2024 13:32:16 -0500 Subject: [PATCH 3/7] Fix failing docker test. Don't use a pointer without checking it first. Signed-off-by: Chris Cleeland --- collector/udp_queues_linux.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go index 104fda39..15beb907 100644 --- a/collector/udp_queues_linux.go +++ b/collector/udp_queues_linux.go @@ -61,7 +61,9 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { if errIPv4 == nil { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s4.Drops), "drops", "v4") + if s4.Drops != nil { + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s4.Drops), "drops", "v4") + } } else { if errors.Is(errIPv4, os.ErrNotExist) { c.logger.Debug("not collecting ipv4 based metrics") @@ -74,7 +76,9 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { if errIPv6 == nil { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s6.Drops), "drops", "v6") + if s6.Drops != nil { + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s6.Drops), "drops", "v6") + } } else { if errors.Is(errIPv6, os.ErrNotExist) { c.logger.Debug("not collecting ipv6 based metrics") From 1a67c6224acdb058571d045076d1cfe86ccf1d19 Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Tue, 23 Apr 2024 11:22:29 -0500 Subject: [PATCH 4/7] UDP drops should be a counter, not a gauge. I think it makes sense for "drops" to sit alongside the queue length gauges in prom stats because they are all neighbors in procfs (source of these stats). Moreover, in reading the commit log message for the original creating work for udp_queues, I think there may have been some misreading or confusion between the word "state" and the common short-form of "stats" to mean "statistics". The original author "chose the name 'udp_queue' instead of 'udpstat' as UDP has no state"; I believe that 'udpstat' might actually be the more appropriate name. Signed-off-by: Chris Cleeland --- collector/udp_queues_linux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go index 15beb907..8d7cd690 100644 --- a/collector/udp_queues_linux.go +++ b/collector/udp_queues_linux.go @@ -62,7 +62,7 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") if s4.Drops != nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s4.Drops), "drops", "v4") + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.CounterValue, float64(*s4.Drops), "drops", "v4") } } else { if errors.Is(errIPv4, os.ErrNotExist) { @@ -77,7 +77,7 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") if s6.Drops != nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(*s6.Drops), "drops", "v6") + ch <- prometheus.MustNewConstMetric(c.desc, prometheus.CounterValue, float64(*s6.Drops), "drops", "v6") } } else { if errors.Is(errIPv6, os.ErrNotExist) { From 146d5e8e680cd1e60f402f10ed57bae17c8c712c Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Mon, 6 Jan 2025 12:39:23 -0600 Subject: [PATCH 5/7] Update e2e test expectations. Signed-off-by: Chris Cleeland --- collector/fixtures/e2e-64k-page-output.txt | 2 +- collector/fixtures/e2e-output.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 9e9d2960..7f4b17d2 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -3243,7 +3243,7 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # HELP node_time_zone_offset_seconds System time zone offset in seconds. # TYPE node_time_zone_offset_seconds gauge # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. -# TYPE node_udp_queues gauge +# TYPE node_udp_queues counter node_udp_queues{ip="v4",queue="drops"} 100 node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 5294f274..6dc8e7a8 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -3265,7 +3265,7 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # HELP node_time_zone_offset_seconds System time zone offset in seconds. # TYPE node_time_zone_offset_seconds gauge # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. -# TYPE node_udp_queues gauge +# TYPE node_udp_queues counter node_udp_queues{ip="v4",queue="drops"} 100 node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 From 62b30d638f878f194e43b05c386cc00b872a4557 Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Mon, 6 Jan 2025 15:59:27 -0600 Subject: [PATCH 6/7] Fix yamllint error due to trailing whitespace. Signed-off-by: Chris Cleeland --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 96b38cff..5122a8a4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -57,7 +57,7 @@ jobs: fi if [[ -f "$(pwd)/.build/darwin-amd64/node_exporter" ]]; then - promu codesign "$(pwd)/.build/darwin-amd64/node_exporter" + promu codesign "$(pwd)/.build/darwin-amd64/node_exporter" fi - persist_to_workspace: root: . From 13f019f82272bb3b287a67b42ac788e7f214c9a3 Mon Sep 17 00:00:00 2001 From: Chris Cleeland Date: Mon, 6 Jan 2025 23:05:45 -0600 Subject: [PATCH 7/7] Break udp_queues into udp_{queues,drops_total} This separates the gauges related to udp_queues and the counter related to the total number of drops, but keeps them under the same namespace. Signed-off-by: Chris Cleeland --- README.md | 2 +- collector/fixtures/e2e-64k-page-output.txt | 8 ++++-- collector/fixtures/e2e-output.txt | 8 ++++-- collector/udp_queues_linux.go | 32 +++++++++++++--------- end-to-end-test.sh | 2 +- 5 files changed, 31 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index ff5d27ea..31ee0528 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ thermal | Exposes thermal statistics like `pmset -g therm`. | Darwin thermal\_zone | Exposes thermal zone & cooling device statistics from `/sys/class/thermal`. | Linux time | Exposes the current system time. | _any_ timex | Exposes selected adjtimex(2) system call stats. | Linux -udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from `/proc/net/udp` and `/proc/net/udp6`. | Linux +udp | Exposes UDP statistics from `/proc/net/udp` and `/proc/net/udp6`. | Linux uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD vmstat | Exposes statistics from `/proc/vmstat`. | Linux watchdog | Exposes statistics from `/sys/class/watchdog` | Linux diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 7f4b17d2..d0a57cf3 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2991,7 +2991,7 @@ node_scrape_collector_success{collector="tapestats"} 1 node_scrape_collector_success{collector="textfile"} 1 node_scrape_collector_success{collector="thermal_zone"} 1 node_scrape_collector_success{collector="time"} 1 -node_scrape_collector_success{collector="udp_queues"} 1 +node_scrape_collector_success{collector="udp"} 1 node_scrape_collector_success{collector="vmstat"} 1 node_scrape_collector_success{collector="watchdog"} 1 node_scrape_collector_success{collector="wifi"} 1 @@ -3242,9 +3242,11 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # TYPE node_time_seconds gauge # HELP node_time_zone_offset_seconds System time zone offset in seconds. # TYPE node_time_zone_offset_seconds gauge +# HELP node_udp_drops_total Total number of datagrams dropped. +# TYPE node_udp_drops_total counter +node_udp_drops_total{ip="v4"} 100 # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. -# TYPE node_udp_queues counter -node_udp_queues{ip="v4",queue="drops"} 100 +# TYPE node_udp_queues gauge node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 6dc8e7a8..bcec4f26 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -3013,7 +3013,7 @@ node_scrape_collector_success{collector="tapestats"} 1 node_scrape_collector_success{collector="textfile"} 1 node_scrape_collector_success{collector="thermal_zone"} 1 node_scrape_collector_success{collector="time"} 1 -node_scrape_collector_success{collector="udp_queues"} 1 +node_scrape_collector_success{collector="udp"} 1 node_scrape_collector_success{collector="vmstat"} 1 node_scrape_collector_success{collector="watchdog"} 1 node_scrape_collector_success{collector="wifi"} 1 @@ -3264,9 +3264,11 @@ node_time_clocksource_current_info{clocksource="tsc",device="0"} 1 # TYPE node_time_seconds gauge # HELP node_time_zone_offset_seconds System time zone offset in seconds. # TYPE node_time_zone_offset_seconds gauge +# HELP node_udp_drops_total Total number of datagrams dropped. +# TYPE node_udp_drops_total counter +node_udp_drops_total{ip="v4"} 100 # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. -# TYPE node_udp_queues counter -node_udp_queues{ip="v4",queue="drops"} 100 +# TYPE node_udp_queues gauge node_udp_queues{ip="v4",queue="rx"} 0 node_udp_queues{ip="v4",queue="tx"} 21 # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go index 8d7cd690..c9f4eb9b 100644 --- a/collector/udp_queues_linux.go +++ b/collector/udp_queues_linux.go @@ -27,42 +27,48 @@ import ( ) type ( - udpQueuesCollector struct { + udpCollector struct { fs procfs.FS - desc *prometheus.Desc + queues *prometheus.Desc + drops *prometheus.Desc logger *slog.Logger } ) func init() { - registerCollector("udp_queues", defaultEnabled, NewUDPqueuesCollector) + registerCollector("udp", defaultEnabled, NewUDPCollector) } // NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes. -func NewUDPqueuesCollector(logger *slog.Logger) (Collector, error) { +func NewUDPCollector(logger *slog.Logger) (Collector, error) { fs, err := procfs.NewFS(*procPath) if err != nil { return nil, fmt.Errorf("failed to open procfs: %w", err) } - return &udpQueuesCollector{ + return &udpCollector{ fs: fs, - desc: prometheus.NewDesc( + queues: prometheus.NewDesc( prometheus.BuildFQName(namespace, "udp", "queues"), "Number of allocated memory in the kernel for UDP datagrams in bytes.", []string{"queue", "ip"}, nil, ), + drops: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "udp", "drops_total"), + "Total number of datagrams dropped.", + []string{"ip"}, nil, + ), logger: logger, }, nil } -func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { +func (c *udpCollector) Update(ch chan<- prometheus.Metric) error { s4, errIPv4 := c.fs.NetUDPSummary() if errIPv4 == nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") + ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") + ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") if s4.Drops != nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.CounterValue, float64(*s4.Drops), "drops", "v4") + ch <- prometheus.MustNewConstMetric(c.drops, prometheus.CounterValue, float64(*s4.Drops), "v4") } } else { if errors.Is(errIPv4, os.ErrNotExist) { @@ -74,10 +80,10 @@ func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { s6, errIPv6 := c.fs.NetUDP6Summary() if errIPv6 == nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") + ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") + ch <- prometheus.MustNewConstMetric(c.queues, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") if s6.Drops != nil { - ch <- prometheus.MustNewConstMetric(c.desc, prometheus.CounterValue, float64(*s6.Drops), "drops", "v6") + ch <- prometheus.MustNewConstMetric(c.drops, prometheus.CounterValue, float64(*s6.Drops), "v6") } } else { if errors.Is(errIPv6, os.ErrNotExist) { diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 35e4534e..0816951d 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -47,7 +47,7 @@ enabled_collectors=$(cat << COLLECTORS sysctl textfile thermal_zone - udp_queues + udp vmstat watchdog wifi