mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-12-31 16:37:31 -08:00
Merge pull request #324 from prometheus/superq/edac_mc
Add collector for Linux EDAC
This commit is contained in:
commit
12f8494a83
|
@ -21,6 +21,7 @@ Name | Description | OS
|
|||
conntrack | Shows conntrack statistics (does nothing if no `/proc/sys/net/netfilter/` present). | Linux
|
||||
cpu | Exposes CPU statistics | Darwin, Dragonfly, FreeBSD
|
||||
diskstats | Exposes disk I/O statistics from `/proc/diskstats`. | Linux
|
||||
edac | Exposes error detection and correction statistics. | Linux
|
||||
entropy | Exposes available entropy. | Linux
|
||||
filefd | Exposes file descriptor statistics from `/proc/sys/fs/file-nr`. | Linux
|
||||
filesystem | Exposes filesystem statistics, such as disk space used. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD
|
||||
|
|
151
collector/edac_linux.go
Normal file
151
collector/edac_linux.go
Normal file
|
@ -0,0 +1,151 @@
|
|||
// Copyright 2015 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !noedac
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
edacSubsystem = "edac"
|
||||
)
|
||||
|
||||
var (
|
||||
edacMemControllerRE = regexp.MustCompile(`.*devices/system/edac/mc/mc([0-9]*)`)
|
||||
edacMemCsrowRE = regexp.MustCompile(`.*devices/system/edac/mc/mc[0-9]*/csrow([0-9]*)`)
|
||||
)
|
||||
|
||||
type edacMCMetric struct {
|
||||
metricName string
|
||||
metricType prometheus.ValueType
|
||||
metricHelp string
|
||||
memController string
|
||||
value float64
|
||||
}
|
||||
|
||||
type edacCollector struct {
|
||||
ceCount *prometheus.Desc
|
||||
ueCount *prometheus.Desc
|
||||
csRowCECount *prometheus.Desc
|
||||
csRowUECount *prometheus.Desc
|
||||
}
|
||||
|
||||
func init() {
|
||||
Factories["edac"] = NewEdacCollector
|
||||
}
|
||||
|
||||
// Takes a prometheus registry and returns a new Collector exposing
|
||||
// edac stats.
|
||||
func NewEdacCollector() (Collector, error) {
|
||||
return &edacCollector{
|
||||
ceCount: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, edacSubsystem, "correctable_errors_total"),
|
||||
"Total correctable memory errors.",
|
||||
[]string{"controller"}, nil,
|
||||
),
|
||||
ueCount: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, edacSubsystem, "uncorrectable_errors_total"),
|
||||
"Total uncorrectable memory errors.",
|
||||
[]string{"controller"}, nil,
|
||||
),
|
||||
csRowCECount: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, edacSubsystem, "csrow_correctable_errors_total"),
|
||||
"Total correctable memory errors for this csrow.",
|
||||
[]string{"controller", "csrow"}, nil,
|
||||
),
|
||||
csRowUECount: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, edacSubsystem, "csrow_uncorrectable_errors_total"),
|
||||
"Total uncorrectable memory errors for this csrow.",
|
||||
[]string{"controller", "csrow"}, nil,
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *edacCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
||||
memControllers, err := filepath.Glob(sysFilePath("devices/system/edac/mc/mc[0-9]*"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, controller := range memControllers {
|
||||
controllerMatch := edacMemControllerRE.FindStringSubmatch(controller)
|
||||
if controllerMatch == nil {
|
||||
return fmt.Errorf("controller string didn't match regexp: %s", controller)
|
||||
}
|
||||
controllerNumber := controllerMatch[1]
|
||||
|
||||
value, err := readUintFromFile(path.Join(controller, "ce_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ce_count for controller %s: %s", controllerNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ceCount, prometheus.CounterValue, float64(value), controllerNumber)
|
||||
|
||||
value, err = readUintFromFile(path.Join(controller, "ce_noinfo_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ce_noinfo_count for controller %s: %s", controllerNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, "unknown")
|
||||
|
||||
value, err = readUintFromFile(path.Join(controller, "ue_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ue_count for controller %s: %s", controllerNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ueCount, prometheus.CounterValue, float64(value), controllerNumber)
|
||||
|
||||
value, err = readUintFromFile(path.Join(controller, "ue_noinfo_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ue_noinfo_count for controller %s: %s", controllerNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, "uknown")
|
||||
|
||||
// For each controller, walk the csrow directories.
|
||||
csrows, err := filepath.Glob(controller + "/csrow[0-9]*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, csrow := range csrows {
|
||||
csrowMatch := edacMemCsrowRE.FindStringSubmatch(csrow)
|
||||
if csrowMatch == nil {
|
||||
return fmt.Errorf("csrow string didn't match regexp: %s", csrow)
|
||||
}
|
||||
csrowNumber := csrowMatch[1]
|
||||
|
||||
value, err = readUintFromFile(path.Join(csrow, "ce_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ce_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.csRowCECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber)
|
||||
|
||||
value, err = readUintFromFile(path.Join(csrow, "ue_count"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get ue_count for controller/csrow %s/%s: %s", controllerNumber, csrowNumber, err)
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.csRowUECount, prometheus.CounterValue, float64(value), controllerNumber, csrowNumber)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
|
@ -401,6 +401,20 @@ node_drbd_remote_pending{device="drbd1"} 12346
|
|||
# HELP node_drbd_remote_unacknowledged Number of requests received by the peer via the network connection, but that have not yet been answered.
|
||||
# TYPE node_drbd_remote_unacknowledged gauge
|
||||
node_drbd_remote_unacknowledged{device="drbd1"} 12347
|
||||
# HELP node_edac_correctable_errors_total Total correctable memory errors.
|
||||
# TYPE node_edac_correctable_errors_total counter
|
||||
node_edac_correctable_errors_total{controller="0"} 1
|
||||
# HELP node_edac_csrow_correctable_errors_total Total correctable memory errors for this csrow.
|
||||
# TYPE node_edac_csrow_correctable_errors_total counter
|
||||
node_edac_csrow_correctable_errors_total{controller="0",csrow="0"} 3
|
||||
node_edac_csrow_correctable_errors_total{controller="0",csrow="unknown"} 2
|
||||
# HELP node_edac_csrow_uncorrectable_errors_total Total uncorrectable memory errors for this csrow.
|
||||
# TYPE node_edac_csrow_uncorrectable_errors_total counter
|
||||
node_edac_csrow_uncorrectable_errors_total{controller="0",csrow="0"} 4
|
||||
node_edac_csrow_uncorrectable_errors_total{controller="0",csrow="uknown"} 6
|
||||
# HELP node_edac_uncorrectable_errors_total Total uncorrectable memory errors.
|
||||
# TYPE node_edac_uncorrectable_errors_total counter
|
||||
node_edac_uncorrectable_errors_total{controller="0"} 5
|
||||
# HELP node_entropy_available_bits Bits of available entropy.
|
||||
# TYPE node_entropy_available_bits gauge
|
||||
node_entropy_available_bits 1337
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
1
|
|
@ -0,0 +1 @@
|
|||
2
|
|
@ -0,0 +1 @@
|
|||
3
|
|
@ -0,0 +1 @@
|
|||
4
|
|
@ -0,0 +1 @@
|
|||
5
|
|
@ -0,0 +1 @@
|
|||
6
|
|
@ -6,6 +6,7 @@ collectors=$(cat << COLLECTORS
|
|||
conntrack
|
||||
diskstats
|
||||
drbd
|
||||
edac
|
||||
entropy
|
||||
filefd
|
||||
hwmon
|
||||
|
|
|
@ -32,7 +32,7 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
defaultCollectors = "conntrack,cpu,diskstats,entropy,filefd,filesystem,hwmon,loadavg,mdadm,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname,vmstat,zfs"
|
||||
defaultCollectors = "conntrack,cpu,diskstats,entropy,edac,filefd,filesystem,hwmon,loadavg,mdadm,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname,vmstat,zfs"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
Loading…
Reference in a new issue