node_exporter/collector/infiniband_linux.go
Benjamin Drung 27b8c93a5a Use InfiniBandClass from procfs library (#1396)
Parsing the sysfs files for InfiniBand was added to the procfs library
(see https://github.com/prometheus/procfs/pull/164).

Therefore use `InfiniBandClass` from the procfs library instead of
parsing sysfs itself.

If the port counter return `N/A (no PMA)` no metric will be returned
(instead of returning 0 for this metric.

Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
2019-09-23 18:18:35 +02:00

137 lines
7.6 KiB
Go

// Copyright 2017-2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build linux
// +build !noinfiniband
package collector
import (
"fmt"
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/sysfs"
)
type infinibandCollector struct {
fs sysfs.FS
metricDescs map[string]*prometheus.Desc
}
func init() {
registerCollector("infiniband", defaultEnabled, NewInfiniBandCollector)
}
// NewInfiniBandCollector returns a new Collector exposing InfiniBand stats.
func NewInfiniBandCollector() (Collector, error) {
var i infinibandCollector
var err error
i.fs, err = sysfs.NewFS(*sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %v", err)
}
// Detailed description for all metrics.
descriptions := map[string]string{
"legacy_multicast_packets_received_total": "Number of multicast packets received",
"legacy_multicast_packets_transmitted_total": "Number of multicast packets transmitted",
"legacy_data_received_bytes_total": "Number of data octets received on all links",
"legacy_packets_received_total": "Number of data packets received on all links",
"legacy_unicast_packets_received_total": "Number of unicast packets received",
"legacy_unicast_packets_transmitted_total": "Number of unicast packets transmitted",
"legacy_data_transmitted_bytes_total": "Number of data octets transmitted on all links",
"legacy_packets_transmitted_total": "Number of data packets received on all links",
"link_downed_total": "Number of times the link failed to recover from an error state and went down",
"link_error_recovery_total": "Number of times the link successfully recovered from an error state",
"multicast_packets_received_total": "Number of multicast packets received (including errors)",
"multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)",
"port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded",
"port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port",
"port_data_received_bytes_total": "Number of data octets received on all links",
"port_data_transmitted_bytes_total": "Number of data octets transmitted on all links",
"port_discards_received_total": "Number of inbound packets discarded by the port because the port is down or congested",
"port_discards_transmitted_total": "Number of outbound packets discarded by the port because the port is down or congested",
"port_errors_received_total": "Number of packets containing an error that were received on this port",
"port_packets_received_total": "Number of packets received on all VLs by this port (including errors)",
"port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)",
"port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick",
"unicast_packets_received_total": "Number of unicast packets received (including errors)",
"unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)",
}
i.metricDescs = make(map[string]*prometheus.Desc)
for metricName, description := range descriptions {
i.metricDescs[metricName] = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "infiniband", metricName),
description,
[]string{"device", "port"},
nil,
)
}
return &i, nil
}
func (c *infinibandCollector) pushMetric(ch chan<- prometheus.Metric, name string, value uint64, deviceName string, port string, valueType prometheus.ValueType) {
ch <- prometheus.MustNewConstMetric(c.metricDescs[name], valueType, float64(value), deviceName, port)
}
func (c *infinibandCollector) pushCounter(ch chan<- prometheus.Metric, name string, value *uint64, deviceName string, port string) {
if value != nil {
c.pushMetric(ch, name, *value, deviceName, port, prometheus.CounterValue)
}
}
func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
devices, err := c.fs.InfiniBandClass()
if err != nil {
return fmt.Errorf("error obtaining InfiniBand class info: %s", err)
}
for _, device := range devices {
for _, port := range device.Ports {
portStr := strconv.FormatUint(uint64(port.Port), 10)
c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr)
c.pushCounter(ch, "legacy_packets_received_total", port.Counters.LegacyPortRcvPackets64, port.Name, portStr)
c.pushCounter(ch, "legacy_unicast_packets_received_total", port.Counters.LegacyPortUnicastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_unicast_packets_transmitted_total", port.Counters.LegacyPortUnicastXmitPackets, port.Name, portStr)
c.pushCounter(ch, "legacy_data_transmitted_bytes_total", port.Counters.LegacyPortXmitData64, port.Name, portStr)
c.pushCounter(ch, "legacy_packets_transmitted_total", port.Counters.LegacyPortXmitPackets64, port.Name, portStr)
c.pushCounter(ch, "link_downed_total", port.Counters.LinkDowned, port.Name, portStr)
c.pushCounter(ch, "link_error_recovery_total", port.Counters.LinkErrorRecovery, port.Name, portStr)
c.pushCounter(ch, "multicast_packets_received_total", port.Counters.MulticastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "multicast_packets_transmitted_total", port.Counters.MulticastXmitPackets, port.Name, portStr)
c.pushCounter(ch, "port_constraint_errors_received_total", port.Counters.PortRcvConstraintErrors, port.Name, portStr)
c.pushCounter(ch, "port_constraint_errors_transmitted_total", port.Counters.PortXmitConstraintErrors, port.Name, portStr)
c.pushCounter(ch, "port_data_received_bytes_total", port.Counters.PortRcvData, port.Name, portStr)
c.pushCounter(ch, "port_data_transmitted_bytes_total", port.Counters.PortXmitData, port.Name, portStr)
c.pushCounter(ch, "port_discards_received_total", port.Counters.PortRcvDiscards, port.Name, portStr)
c.pushCounter(ch, "port_discards_transmitted_total", port.Counters.PortXmitDiscards, port.Name, portStr)
c.pushCounter(ch, "port_errors_received_total", port.Counters.PortRcvErrors, port.Name, portStr)
c.pushCounter(ch, "port_packets_received_total", port.Counters.PortRcvPackets, port.Name, portStr)
c.pushCounter(ch, "port_packets_transmitted_total", port.Counters.PortXmitPackets, port.Name, portStr)
c.pushCounter(ch, "port_transmit_wait_total", port.Counters.PortXmitWait, port.Name, portStr)
c.pushCounter(ch, "unicast_packets_received_total", port.Counters.UnicastRcvPackets, port.Name, portStr)
c.pushCounter(ch, "unicast_packets_transmitted_total", port.Counters.UnicastXmitPackets, port.Name, portStr)
}
}
return nil
}