Add support for legacy InfiniBand drivers

Older versions of the OFED drivers contain 64-bit variants of the port counters and are located in a directory named 'counters_ext'. This patch includes these older metrics that have since been deprecated with OFED 4.0.

Signed-Off-By: Robert Clark <robert.d.clark@hpe.com>
This commit is contained in:
Robert Clark 2017-03-09 16:10:36 -06:00
parent 8529cd3359
commit 3a5917dfdc
18 changed files with 91 additions and 2 deletions

View file

@ -689,6 +689,38 @@ node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp2"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp3"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp4"} 84
node_hwmon_temp_max_celsius{chip="platform_coretemp_1",sensor="temp5"} 84
# HELP node_infiniband_legacy_data_received_bytes_total Number of data octets received on all links
# TYPE node_infiniband_legacy_data_received_bytes_total counter
node_infiniband_legacy_data_received_bytes_total{device="mlx4_0",port="1"} 4.631917e+06
node_infiniband_legacy_data_received_bytes_total{device="mlx4_0",port="2"} 4.631917e+06
# HELP node_infiniband_legacy_data_transmitted_bytes_total Number of data octets transmitted on all links
# TYPE node_infiniband_legacy_data_transmitted_bytes_total counter
node_infiniband_legacy_data_transmitted_bytes_total{device="mlx4_0",port="1"} 3.73344e+06
node_infiniband_legacy_data_transmitted_bytes_total{device="mlx4_0",port="2"} 3.73344e+06
# HELP node_infiniband_legacy_multicast_packets_received_total Number of multicast packets received
# TYPE node_infiniband_legacy_multicast_packets_received_total counter
node_infiniband_legacy_multicast_packets_received_total{device="mlx4_0",port="1"} 93
node_infiniband_legacy_multicast_packets_received_total{device="mlx4_0",port="2"} 93
# HELP node_infiniband_legacy_multicast_packets_transmitted_total Number of multicast packets transmitted
# TYPE node_infiniband_legacy_multicast_packets_transmitted_total counter
node_infiniband_legacy_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
node_infiniband_legacy_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 16
# HELP node_infiniband_legacy_packets_received_total Number of data packets received on all links
# TYPE node_infiniband_legacy_packets_received_total counter
node_infiniband_legacy_packets_received_total{device="mlx4_0",port="1"} 0
node_infiniband_legacy_packets_received_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_legacy_packets_transmitted_total Number of data packets received on all links
# TYPE node_infiniband_legacy_packets_transmitted_total counter
node_infiniband_legacy_packets_transmitted_total{device="mlx4_0",port="1"} 0
node_infiniband_legacy_packets_transmitted_total{device="mlx4_0",port="2"} 0
# HELP node_infiniband_legacy_unicast_packets_received_total Number of unicast packets received
# TYPE node_infiniband_legacy_unicast_packets_received_total counter
node_infiniband_legacy_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
node_infiniband_legacy_unicast_packets_received_total{device="mlx4_0",port="2"} 61148
# HELP node_infiniband_legacy_unicast_packets_transmitted_total Number of unicast packets transmitted
# TYPE node_infiniband_legacy_unicast_packets_transmitted_total counter
node_infiniband_legacy_unicast_packets_transmitted_total{device="mlx4_0",port="1"} 61239
node_infiniband_legacy_unicast_packets_transmitted_total{device="mlx4_0",port="2"} 61239
# HELP node_infiniband_link_downed_total Number of times the link failed to recover from an error state and went down
# TYPE node_infiniband_link_downed_total counter
node_infiniband_link_downed_total{device="mlx4_0",port="1"} 0

View file

@ -33,8 +33,9 @@ var (
)
type infinibandCollector struct {
metricDescs map[string]*prometheus.Desc
counters map[string]infinibandMetric
metricDescs map[string]*prometheus.Desc
counters map[string]infinibandMetric
legacyCounters map[string]infinibandMetric
}
type infinibandMetric struct {
@ -62,6 +63,18 @@ func NewInfiniBandCollector() (Collector, error) {
"unicast_packets_transmitted_total": {"unicast_xmit_packets", "Number of unicast packets transmitted (including errors)"},
}
// Deprecated counters for some older versions of InfiniBand drivers.
i.legacyCounters = map[string]infinibandMetric{
"legacy_multicast_packets_received_total": {"port_multicast_rcv_packets", "Number of multicast packets received"},
"legacy_multicast_packets_transmitted_total": {"port_multicast_xmit_packets", "Number of multicast packets transmitted"},
"legacy_data_received_bytes_total": {"port_rcv_data_64", "Number of data octets received on all links"},
"legacy_packets_received_total": {"port_rcv_packets_64", "Number of data packets received on all links"},
"legacy_unicast_packets_received_total": {"port_unicast_rcv_packets", "Number of unicast packets received"},
"legacy_unicast_packets_transmitted_total": {"port_unicast_xmit_packets", "Number of unicast packets transmitted"},
"legacy_data_transmitted_bytes_total": {"port_xmit_data_64", "Number of data octets transmitted on all links"},
"legacy_packets_transmitted_total": {"port_xmit_packets_64", "Number of data packets received on all links"},
}
subsystem := "infiniband"
i.metricDescs = make(map[string]*prometheus.Desc)
@ -74,6 +87,15 @@ func NewInfiniBandCollector() (Collector, error) {
)
}
for metricName, infinibandMetric := range i.legacyCounters {
i.metricDescs[metricName] = prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, metricName),
infinibandMetric.Help,
[]string{"device", "port"},
nil,
)
}
return &i, nil
}
@ -175,6 +197,25 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
port,
)
}
// Add metrics for the legacy InfiniBand counters.
for metricName, infinibandMetric := range c.legacyCounters {
if _, err := os.Stat(filepath.Join(portFiles, "counters_ext", infinibandMetric.File)); os.IsNotExist(err) {
continue
}
metric, err := readMetric(filepath.Join(portFiles, "counters_ext"), infinibandMetric.File)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(
c.metricDescs[metricName],
prometheus.CounterValue,
float64(metric),
device,
port,
)
}
}
}