| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | // Copyright 2015 The Prometheus Authors
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-03 04:35:24 -07:00
										 |  |  | //go:build !noedac
 | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | // +build !noedac
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package collector | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 	"log/slog" | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 	"path/filepath" | 
					
						
							|  |  |  | 	"regexp" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/prometheus/client_golang/prometheus" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	edacSubsystem = "edac" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var ( | 
					
						
							|  |  |  | 	edacMemControllerRE = regexp.MustCompile(`.*devices/system/edac/mc/mc([0-9]*)`) | 
					
						
							|  |  |  | 	edacMemCsrowRE      = regexp.MustCompile(`.*devices/system/edac/mc/mc[0-9]*/csrow([0-9]*)`) | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type edacCollector struct { | 
					
						
							|  |  |  | 	ceCount      *prometheus.Desc | 
					
						
							|  |  |  | 	ueCount      *prometheus.Desc | 
					
						
							|  |  |  | 	csRowCECount *prometheus.Desc | 
					
						
							|  |  |  | 	csRowUECount *prometheus.Desc | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | 	logger       *slog.Logger | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func init() { | 
					
						
							| 
									
										
										
										
											2017-09-28 06:06:26 -07:00
										 |  |  | 	registerCollector("edac", defaultEnabled, NewEdacCollector) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-28 08:44:53 -08:00
										 |  |  | // NewEdacCollector returns a new Collector exposing edac stats.
 | 
					
						
							| 
									
										
										
										
											2024-09-11 01:51:28 -07:00
										 |  |  | func NewEdacCollector(logger *slog.Logger) (Collector, error) { | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 	return &edacCollector{ | 
					
						
							|  |  |  | 		ceCount: prometheus.NewDesc( | 
					
						
							| 
									
										
										
										
											2017-09-28 06:06:26 -07:00
										 |  |  | 			prometheus.BuildFQName(namespace, edacSubsystem, "correctable_errors_total"), | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			"Total correctable memory errors.", | 
					
						
							|  |  |  | 			[]string{"controller"}, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		ueCount: prometheus.NewDesc( | 
					
						
							| 
									
										
										
										
											2017-09-28 06:06:26 -07:00
										 |  |  | 			prometheus.BuildFQName(namespace, edacSubsystem, "uncorrectable_errors_total"), | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			"Total uncorrectable memory errors.", | 
					
						
							|  |  |  | 			[]string{"controller"}, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		csRowCECount: prometheus.NewDesc( | 
					
						
							| 
									
										
										
										
											2017-09-28 06:06:26 -07:00
										 |  |  | 			prometheus.BuildFQName(namespace, edacSubsystem, "csrow_correctable_errors_total"), | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			"Total correctable memory errors for this csrow.", | 
					
						
							|  |  |  | 			[]string{"controller", "csrow"}, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							|  |  |  | 		csRowUECount: prometheus.NewDesc( | 
					
						
							| 
									
										
										
										
											2017-09-28 06:06:26 -07:00
										 |  |  | 			prometheus.BuildFQName(namespace, edacSubsystem, "csrow_uncorrectable_errors_total"), | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			"Total uncorrectable memory errors for this csrow.", | 
					
						
							|  |  |  | 			[]string{"controller", "csrow"}, nil, | 
					
						
							|  |  |  | 		), | 
					
						
							| 
									
										
										
										
											2019-12-31 08:19:37 -08:00
										 |  |  | 		logger: logger, | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 	}, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-28 10:47:20 -08:00
										 |  |  | func (c *edacCollector) Update(ch chan<- prometheus.Metric) error { | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 	memControllers, err := filepath.Glob(sysFilePath("devices/system/edac/mc/mc[0-9]*")) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, controller := range memControllers { | 
					
						
							|  |  |  | 		controllerMatch := edacMemControllerRE.FindStringSubmatch(controller) | 
					
						
							|  |  |  | 		if controllerMatch == nil { | 
					
						
							|  |  |  | 			return fmt.Errorf("controller string didn't match regexp: %s", controller) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		controllerNumber := controllerMatch[1] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 		value, err := readUintFromFile(filepath.Join(controller, "ce_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 			return fmt.Errorf("couldn't get ce_count for controller %s: %w", controllerNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		ch <- prometheus.MustNewConstMetric( | 
					
						
							|  |  |  | 			c.ceCount, prometheus.CounterValue, float64(value), controllerNumber) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 		value, err = readUintFromFile(filepath.Join(controller, "ce_noinfo_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 			return fmt.Errorf("couldn't get ce_noinfo_count for controller %s: %w", controllerNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		ch <- prometheus.MustNewConstMetric( | 
					
						
							|  |  |  | 			c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 		value, err = readUintFromFile(filepath.Join(controller, "ue_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 			return fmt.Errorf("couldn't get ue_count for controller %s: %w", controllerNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		ch <- prometheus.MustNewConstMetric( | 
					
						
							|  |  |  | 			c.ueCount, prometheus.CounterValue, float64(value), controllerNumber) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 		value, err = readUintFromFile(filepath.Join(controller, "ue_noinfo_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 			return fmt.Errorf("couldn't get ue_noinfo_count for controller %s: %w", controllerNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		ch <- prometheus.MustNewConstMetric( | 
					
						
							| 
									
										
										
										
											2017-04-18 03:45:06 -07:00
										 |  |  | 			c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown") | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		// For each controller, walk the csrow directories.
 | 
					
						
							|  |  |  | 		csrows, err := filepath.Glob(controller + "/csrow[0-9]*") | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		for _, csrow := range csrows { | 
					
						
							|  |  |  | 			csrowMatch := edacMemCsrowRE.FindStringSubmatch(csrow) | 
					
						
							|  |  |  | 			if csrowMatch == nil { | 
					
						
							|  |  |  | 				return fmt.Errorf("csrow string didn't match regexp: %s", csrow) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			csrowNumber := csrowMatch[1] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 			value, err = readUintFromFile(filepath.Join(csrow, "ce_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 				return fmt.Errorf("couldn't get ce_count for controller/csrow %s/%s: %w", controllerNumber, csrowNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric( | 
					
						
							|  |  |  | 				c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-02-05 07:37:27 -08:00
										 |  |  | 			value, err = readUintFromFile(filepath.Join(csrow, "ue_count")) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			if err != nil { | 
					
						
							| 
									
										
										
										
											2020-06-15 13:27:14 -07:00
										 |  |  | 				return fmt.Errorf("couldn't get ue_count for controller/csrow %s/%s: %w", controllerNumber, csrowNumber, err) | 
					
						
							| 
									
										
										
										
											2016-08-16 08:10:23 -07:00
										 |  |  | 			} | 
					
						
							|  |  |  | 			ch <- prometheus.MustNewConstMetric( | 
					
						
							|  |  |  | 				c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return err | 
					
						
							|  |  |  | } |