mirror of
				https://github.com/prometheus/node_exporter.git
				synced 2025-08-20 18:33:52 -07:00 
			
		
		
		
	Merge 0f1f831200 into be19d537cd
				
					
				
			This commit is contained in:
		
						commit
						5b43cda46d
					
				
							
								
								
									
										252
									
								
								collector/aer_linux.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										252
									
								
								collector/aer_linux.go
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,252 @@
 | 
				
			||||||
 | 
					// Copyright 2024 The Prometheus Authors
 | 
				
			||||||
 | 
					// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					// you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					// You may obtain a copy of the License at
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					// See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					// limitations under the License.
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//go:build !nonetclass && linux
 | 
				
			||||||
 | 
					// +build !nonetclass,linux
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package collector
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"errors"
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
						"log/slog"
 | 
				
			||||||
 | 
						"os"
 | 
				
			||||||
 | 
						"regexp"
 | 
				
			||||||
 | 
						"strconv"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/alecthomas/kingpin/v2"
 | 
				
			||||||
 | 
						"github.com/prometheus/client_golang/prometheus"
 | 
				
			||||||
 | 
						"github.com/prometheus/procfs/sysfs"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var (
 | 
				
			||||||
 | 
						aerIgnoredDevices   = kingpin.Flag("collector.aer.ignored-devices", "Regexp of aer devices to ignore for aer collector.").Default("^$").String()
 | 
				
			||||||
 | 
						aerCorrectableRxErr = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_rx_err"),
 | 
				
			||||||
 | 
							"Count of correctable receiver errors",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableBadTLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_bad_tlp"),
 | 
				
			||||||
 | 
							"Count of correctable bad TLPs",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableBadDLLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_bad_dllp"),
 | 
				
			||||||
 | 
							"Count of correctable bad DLLPs",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableRollover = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_rollover"),
 | 
				
			||||||
 | 
							"Count of correctable rollovers",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableTimeout = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_timeout"),
 | 
				
			||||||
 | 
							"Count of correctable replay timer timeouts",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableNonFatalErr = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_non_fatal_err"),
 | 
				
			||||||
 | 
							"Count of correctable advisory non-fatal errors",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableCorrIntErr = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_corr_int_err"),
 | 
				
			||||||
 | 
							"Count of correctable corrected internal errors",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerCorrectableHeaderOF = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "correctable_header_of"),
 | 
				
			||||||
 | 
							"Count of correctable header log Overflows",
 | 
				
			||||||
 | 
							[]string{"interface"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableUndefined = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_undefined"),
 | 
				
			||||||
 | 
							"Count of uncorrectable undefined errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableDLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_dlp"),
 | 
				
			||||||
 | 
							"Count of uncorrectable data link protocol errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableSDES = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_sdes"),
 | 
				
			||||||
 | 
							"Count of uncorrectable surprise down errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableTLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_tlp"),
 | 
				
			||||||
 | 
							"Count of uncorrectable poisoned TLPs",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableFCP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_fcp"),
 | 
				
			||||||
 | 
							"Count of uncorrectable flow control protocol errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableCmpltTO = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_cmplt_to"),
 | 
				
			||||||
 | 
							"Count of uncorrectable completion timeouts",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableCmpltAbrt = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_cmplt_abrt"),
 | 
				
			||||||
 | 
							"Count of uncorrectable completer aborts",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableUnxCmplt = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_unx_cmplt"),
 | 
				
			||||||
 | 
							"Count of uncorrectable unexpected completion errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableRxOF = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_rx_of"),
 | 
				
			||||||
 | 
							"Count of uncorrectable receiver overflows",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableMalfTLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_malf_tlp"),
 | 
				
			||||||
 | 
							"Count of uncorrectable malformed TLPs",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableECRC = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_ecrc"),
 | 
				
			||||||
 | 
							"Count of uncorrectable ECRCs",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableUnsupReq = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_unsup_req"),
 | 
				
			||||||
 | 
							"Count of uncorrectable unsupported requests",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableACSViol = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_acs_viol"),
 | 
				
			||||||
 | 
							"Count of uncorrectable ACS violations",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableUncorrIntErr = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_uncorr_int_err"),
 | 
				
			||||||
 | 
							"Count of uncorrectable uncorrectable internal errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableBlockedTLP = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_blocked_tlp"),
 | 
				
			||||||
 | 
							"Count of uncorrectable MC blocked TLPs",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableAtomicOpBlocked = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_atomic_op_blocked"),
 | 
				
			||||||
 | 
							"Count of uncorrectable AtomicOp egress blocked errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectableTLPBlockedErr = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_tlp_blocked_err"),
 | 
				
			||||||
 | 
							"Count of uncorrectable TLP prefix blocked errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						aerUncorrectablePoisonTLPBlocked = prometheus.NewDesc(prometheus.BuildFQName(namespace, "aer", "uncorrectable_poison_tlp_blocked"),
 | 
				
			||||||
 | 
							"Count of uncorrectable poison TLP prefix blocked errors",
 | 
				
			||||||
 | 
							[]string{"interface", "fatal"}, nil,
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type aerCollector struct {
 | 
				
			||||||
 | 
						fs                    sysfs.FS
 | 
				
			||||||
 | 
						ignoredDevicesPattern *regexp.Regexp
 | 
				
			||||||
 | 
						logger                *slog.Logger
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func init() {
 | 
				
			||||||
 | 
						registerCollector("aer", defaultDisabled, NewAerCollector)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// NewAerCollector returns a new Collector exposing aer stats.
 | 
				
			||||||
 | 
					func NewAerCollector(logger *slog.Logger) (Collector, error) {
 | 
				
			||||||
 | 
						return makeAerCollector(logger)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func makeAerCollector(logger *slog.Logger) (*aerCollector, error) {
 | 
				
			||||||
 | 
						fs, err := sysfs.NewFS(*sysPath)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("failed to open sysfs: %w", err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if *aerIgnoredDevices != "" {
 | 
				
			||||||
 | 
							logger.Info("Parsed flag --collector.aer.ignored-devices", "flag", *aerIgnoredDevices)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						pattern := regexp.MustCompile(*aerIgnoredDevices)
 | 
				
			||||||
 | 
						return &aerCollector{
 | 
				
			||||||
 | 
							fs:                    fs,
 | 
				
			||||||
 | 
							ignoredDevicesPattern: pattern,
 | 
				
			||||||
 | 
							logger:                logger,
 | 
				
			||||||
 | 
						}, nil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (c *aerCollector) Update(ch chan<- prometheus.Metric) error {
 | 
				
			||||||
 | 
						counters, err := c.fs.AerCounters()
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							if errors.Is(err, os.ErrNotExist) || errors.Is(err, os.ErrPermission) {
 | 
				
			||||||
 | 
								c.logger.Debug("Could not read netclass file", "err", err)
 | 
				
			||||||
 | 
								return ErrNoData
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return fmt.Errorf("could not get net class info: %w", err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for deviceName, deviceCounters := range counters {
 | 
				
			||||||
 | 
							if c.ignoredDevicesPattern.MatchString(deviceName) {
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							c.updateCorrectableCntrs(ch, deviceName, deviceCounters.Correctable)
 | 
				
			||||||
 | 
							c.updateUncorrectableCntrs(ch, deviceName, deviceCounters.Fatal, true)
 | 
				
			||||||
 | 
							c.updateUncorrectableCntrs(ch, deviceName, deviceCounters.NonFatal, false)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (c *aerCollector) updateCorrectableCntrs(ch chan<- prometheus.Metric, deviceName string, counters sysfs.CorrectableAerCounters) {
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableRxErr, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.RxErr), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableBadTLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.BadTLP), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableBadDLLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.BadDLLP), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableRollover, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.Rollover), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableTimeout, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.Timeout), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableNonFatalErr, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.NonFatalErr), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableCorrIntErr, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.CorrIntErr), deviceName)
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerCorrectableHeaderOF, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.HeaderOF), deviceName)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (c *aerCollector) updateUncorrectableCntrs(ch chan<- prometheus.Metric, deviceName string, counters sysfs.UncorrectableAerCounters, fatal bool) {
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableUndefined, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.Undefined), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableDLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.DLP), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableSDES, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.SDES), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableTLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.TLP), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableFCP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.FCP), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableCmpltTO, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.CmpltTO), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableCmpltAbrt, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.CmpltAbrt), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableUnxCmplt, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.UnxCmplt), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableRxOF, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.RxOF), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableMalfTLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.MalfTLP), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableECRC, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.ECRC), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableUnsupReq, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.UnsupReq), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableACSViol, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.ACSViol), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableUncorrIntErr, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.UncorrIntErr), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableBlockedTLP, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.BlockedTLP), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableAtomicOpBlocked, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.AtomicOpBlocked), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectableTLPBlockedErr, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.TLPBlockedErr), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
						ch <- prometheus.MustNewConstMetric(aerUncorrectablePoisonTLPBlocked, prometheus.CounterValue,
 | 
				
			||||||
 | 
							float64(counters.PoisonTLPBlocked), deviceName, strconv.FormatBool(fatal))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in a new issue