mirror of
https://github.com/prometheus/node_exporter.git
synced 2025-03-05 21:00:12 -08:00
update by codereview
Signed-off-by: dongjiang1989 <dongjiang1989@126.com>
This commit is contained in:
parent
4db643e116
commit
f8f478d3a4
|
@ -87,9 +87,9 @@ func NewInfiniBandCollector(logger log.Logger) (Collector, error) {
|
|||
"vl15_dropped_total": "Number of incoming VL15 packets dropped due to resource limitations.",
|
||||
|
||||
// https://enterprise-support.nvidia.com/s/article/understanding-mlx5-linux-counters-and-status-parameters
|
||||
"duplicate_request_total": "The number of received packets. A duplicate request is a request that had been previously executed.",
|
||||
"duplicate_requests_total": "The number of received packets. A duplicate request is a request that had been previously executed.",
|
||||
"implied_nak_seq_errors_total": "The number of time the requested decided an ACK. with a PSN larger than the expected PSN for an RDMA read or response.",
|
||||
"lifespan_millisecond": "The maximum period in ms which defines the aging of the counter reads. Two consecutive reads within this period might return the same values.",
|
||||
"lifespan_seconds": "The maximum period in ms which defines the aging of the counter reads. Two consecutive reads within this period might return the same values.",
|
||||
"local_ack_timeout_errors_total": "The number of times QP's ack timer expired for RC, XRC, DCT QPs at the sender side. The QP retry limit was not exceed, therefore it is still recoverable error.",
|
||||
"np_cnp_sent_total": "The number of CNP packets sent by the Notification Point when it noticed congestion experienced in the RoCEv2 IP header (ECN bits). The counters was added in MLNX_OFED 4.1",
|
||||
"np_ecn_marked_roce_packets_total": "The number of RoCEv2 packets received by the notification point which were marked for experiencing the congestion (ECN bits where '11' on the ingress RoCE traffic) . The counters was added in MLNX_OFED 4.1",
|
||||
|
@ -105,11 +105,11 @@ func NewInfiniBandCollector(logger log.Logger) (Collector, error) {
|
|||
"resp_local_length_errors_total": "The number of times responder detected local length errors. The counters was added in MLNX_OFED 4.1",
|
||||
"resp_remote_access_errors_total": "The number of times responder detected remote access errors. The counters was added in MLNX_OFED 4.1",
|
||||
"rnr_nak_retry_errors_total": "The number of received RNR NAK packets. The QP retry limit was not exceeded.",
|
||||
"roce_adp_retrans_total": "The number of adaptive retransmissions for RoCE traffic. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_adp_retrans_to_total": "The number of times RoCE traffic reached timeout due to adaptive retransmission. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_slow_restart_total": "The number of times RoCE slow restart was used. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_adp_retransmits_total": "The number of adaptive retransmissions for RoCE traffic. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_adp_retransmits_timeout_total": "The number of times RoCE traffic reached timeout due to adaptive retransmission. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_slow_restart_used_total": "The number of times RoCE slow restart was used. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_slow_restart_cnps_total": "The number of times RoCE slow restart generated CNP packets. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_slow_restart_trans_total": "The number of times RoCE slow restart changed state to slow restart. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"roce_slow_restart_total": "The number of times RoCE slow restart changed state to slow restart. The counter was added in MLNX_OFED rev 5.0-1.0.0.0 and kernel v5.6.0",
|
||||
"rp_cnp_handled_total": "The number of CNP packets handled by the Reaction Point HCA to throttle the transmission rate. The counters was added in MLNX_OFED 4.1",
|
||||
"rp_cnp_ignored_total": "The number of CNP packets received and ignored by the Reaction Point HCA. This counter should not raise if RoCE Congestion Control was enabled in the network. If this counter raise, verify that ECN was enabled on the adapter.",
|
||||
"rx_atomic_requests_total": "The number of received ATOMIC request for the associated QPs.",
|
||||
|
@ -204,10 +204,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
|
||||
// port.HwCounters
|
||||
if port.HwCounters.Lifespan != nil {
|
||||
c.pushMetric(ch, "lifespan_millisecond", *(port.HwCounters.Lifespan), port.Name, portStr, prometheus.GaugeValue)
|
||||
c.pushMetric(ch, "lifespan_seconds", *(port.HwCounters.Lifespan)/1000, port.Name, portStr, prometheus.GaugeValue)
|
||||
}
|
||||
|
||||
c.pushCounter(ch, "duplicate_request_total", port.HwCounters.DuplicateRequest, port.Name, portStr)
|
||||
c.pushCounter(ch, "duplicate_requests_total", port.HwCounters.DuplicateRequest, port.Name, portStr)
|
||||
c.pushCounter(ch, "implied_nak_seq_errors_total", port.HwCounters.ImpliedNakSeqErr, port.Name, portStr)
|
||||
c.pushCounter(ch, "local_ack_timeout_errors_total", port.HwCounters.LocalAckTimeoutErr, port.Name, portStr)
|
||||
c.pushCounter(ch, "np_cnp_sent_total", port.HwCounters.NpCnpSent, port.Name, portStr)
|
||||
|
@ -224,11 +224,11 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
|
|||
c.pushCounter(ch, "resp_local_length_errors_total", port.HwCounters.RespLocalLengthError, port.Name, portStr)
|
||||
c.pushCounter(ch, "resp_remote_access_errors_total", port.HwCounters.RespRemoteAccessErrors, port.Name, portStr)
|
||||
c.pushCounter(ch, "rnr_nak_retry_errors_total", port.HwCounters.RnrNakRetryErr, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_adp_retrans_total", port.HwCounters.RoceAdpRetrans, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_adp_retrans_to_total", port.HwCounters.RoceAdpRetransTo, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_slow_restart_total", port.HwCounters.RoceSlowRestart, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_adp_retransmits_total", port.HwCounters.RoceAdpRetrans, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_adp_retransmits_timeout_total", port.HwCounters.RoceAdpRetransTo, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_slow_restart_used_total", port.HwCounters.RoceSlowRestart, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_slow_restart_cnps_total", port.HwCounters.RoceSlowRestartCnps, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_slow_restart_trans_total", port.HwCounters.RoceSlowRestartTrans, port.Name, portStr)
|
||||
c.pushCounter(ch, "roce_slow_restart_total", port.HwCounters.RoceSlowRestartTrans, port.Name, portStr)
|
||||
c.pushCounter(ch, "rp_cnp_handled_total", port.HwCounters.RpCnpHandled, port.Name, portStr)
|
||||
c.pushCounter(ch, "rp_cnp_ignored_total", port.HwCounters.RpCnpIgnored, port.Name, portStr)
|
||||
c.pushCounter(ch, "rx_atomic_requests_total", port.HwCounters.RxAtomicRequests, port.Name, portStr)
|
||||
|
|
Loading…
Reference in a new issue