From ba8c043079b38748e57adf1f80e3d86a4060efc5 Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Mon, 24 Oct 2022 09:30:32 +0100 Subject: [PATCH] Add procfs fallback to netdev collector (#2509) Some systems have broken netlink messages due to patched kernels. Since these messages can not be parsed, add a flag to fall back to parsing from `/proc/net/dev`. Fixes: https://github.com/prometheus/node_exporter/issues/2502 Signed-off-by: Ben Kochie Signed-off-by: Ben Kochie --- collector/netdev_linux.go | 66 +++++++++++++++++++++++++++++++--- collector/netdev_linux_test.go | 10 +++--- 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/collector/netdev_linux.go b/collector/netdev_linux.go index 325d10b7..527d33e4 100644 --- a/collector/netdev_linux.go +++ b/collector/netdev_linux.go @@ -17,28 +17,42 @@ package collector import ( + "fmt" + "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/jsimonetti/rtnetlink" + "github.com/prometheus/procfs" + "gopkg.in/alecthomas/kingpin.v2" +) + +var ( + netDevNetlink = kingpin.Flag("collector.netdev.netlink", "Use netlink to gather stats instead of /proc/net/dev.").Default("true").Bool() ) func getNetDevStats(filter *deviceFilter, logger log.Logger) (netDevStats, error) { + if *netDevNetlink { + return netlinkStats(filter, logger) + } + return procNetDevStats(filter, logger) +} + +func netlinkStats(filter *deviceFilter, logger log.Logger) (netDevStats, error) { conn, err := rtnetlink.Dial(nil) if err != nil { return nil, err } - defer conn.Close() + defer conn.Close() links, err := conn.Link.List() if err != nil { return nil, err } - return netlinkStats(links, filter, logger), nil + return parseNetlinkStats(links, filter, logger), nil } -func netlinkStats(links []rtnetlink.LinkMessage, filter *deviceFilter, logger log.Logger) netDevStats { +func parseNetlinkStats(links []rtnetlink.LinkMessage, filter *deviceFilter, logger log.Logger) netDevStats { metrics := netDevStats{} for _, msg := range links { @@ -87,3 +101,47 @@ func netlinkStats(links []rtnetlink.LinkMessage, filter *deviceFilter, logger lo return metrics } + +func procNetDevStats(filter *deviceFilter, logger log.Logger) (netDevStats, error) { + metrics := netDevStats{} + + fs, err := procfs.NewFS(*procPath) + if err != nil { + return metrics, fmt.Errorf("failed to open procfs: %w", err) + } + + netDev, err := fs.NetDev() + if err != nil { + return metrics, fmt.Errorf("failed to parse /proc/net/dev: %w", err) + } + + for _, stats := range netDev { + name := stats.Name + + if filter.ignored(name) { + level.Debug(logger).Log("msg", "Ignoring device", "device", name) + continue + } + + metrics[name] = map[string]uint64{ + "receive_bytes": stats.RxBytes, + "receive_packets": stats.RxPackets, + "receive_errors": stats.RxErrors, + "receive_dropped": stats.RxDropped, + "receive_fifo": stats.RxFIFO, + "receive_frame": stats.RxFrame, + "receive_compressed": stats.RxCompressed, + "receive_multicast": stats.RxMulticast, + "transmit_bytes": stats.TxBytes, + "transmit_packets": stats.TxPackets, + "transmit_errors": stats.TxErrors, + "transmit_dropped": stats.TxDropped, + "transmit_fifo": stats.TxFIFO, + "transmit_colls": stats.TxCollisions, + "transmit_carrier": stats.TxCarrier, + "transmit_compressed": stats.TxCompressed, + } + } + + return metrics, nil +} diff --git a/collector/netdev_linux_test.go b/collector/netdev_linux_test.go index 32e3d16b..ff5e1c22 100644 --- a/collector/netdev_linux_test.go +++ b/collector/netdev_linux_test.go @@ -163,7 +163,7 @@ var links = []rtnetlink.LinkMessage{ func TestNetDevStatsIgnore(t *testing.T) { filter := newDeviceFilter("^veth", "") - netStats := netlinkStats(links, &filter, log.NewNopLogger()) + netStats := parseNetlinkStats(links, &filter, log.NewNopLogger()) if want, got := uint64(10437182923), netStats["wlan0"]["receive_bytes"]; want != got { t.Errorf("want netstat wlan0 bytes %v, got %v", want, got) @@ -196,7 +196,7 @@ func TestNetDevStatsIgnore(t *testing.T) { func TestNetDevStatsAccept(t *testing.T) { filter := newDeviceFilter("", "^💩0$") - netStats := netlinkStats(links, &filter, log.NewNopLogger()) + netStats := parseNetlinkStats(links, &filter, log.NewNopLogger()) if want, got := 1, len(netStats); want != got { t.Errorf("want count of devices to be %d, got %d", want, got) @@ -227,7 +227,7 @@ func TestNetDevLegacyMetricNames(t *testing.T) { } filter := newDeviceFilter("", "") - netStats := netlinkStats(links, &filter, log.NewNopLogger()) + netStats := parseNetlinkStats(links, &filter, log.NewNopLogger()) for dev, devStats := range netStats { legacy(devStats) @@ -260,7 +260,7 @@ func TestNetDevLegacyMetricValues(t *testing.T) { } filter := newDeviceFilter("", "^enp0s0f0$") - netStats := netlinkStats(links, &filter, log.NewNopLogger()) + netStats := parseNetlinkStats(links, &filter, log.NewNopLogger()) metrics, ok := netStats["enp0s0f0"] if !ok { t.Error("expected stats for interface enp0s0f0") @@ -282,7 +282,7 @@ func TestNetDevLegacyMetricValues(t *testing.T) { func TestNetDevMetricValues(t *testing.T) { filter := newDeviceFilter("", "") - netStats := netlinkStats(links, &filter, log.NewNopLogger()) + netStats := parseNetlinkStats(links, &filter, log.NewNopLogger()) for _, msg := range links { device := msg.Attributes.Name