diff --git a/collector/attributes.go b/collector/attributes.go index a42bc6e9..f5fa71cf 100644 --- a/collector/attributes.go +++ b/collector/attributes.go @@ -8,12 +8,11 @@ import ( ) var ( - attributes = prometheus.NewGauge() + attributes *prometheus.GaugeVec ) type attributesCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -22,22 +21,33 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // labels from the config. -func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewAttributesCollector(config Config) (Collector, error) { c := attributesCollector{ - config: config, - registry: registry, + config: config, } - registry.Register( - "node_attributes", - "node_exporter attributes", - prometheus.NilLabels, - attributes, + labelNames := []string{} + for l := range c.config.Attributes { + labelNames = append(labelNames, l) + } + gv := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "attributes", + Help: "The node_exporter attributes.", + }, + labelNames, ) + collector, err := prometheus.RegisterOrGet(gv) + if err != nil { + return nil, err + } + attributes = collector.(*prometheus.GaugeVec) return &c, nil } func (c *attributesCollector) Update() (updates int, err error) { glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes) - attributes.Set(c.config.Attributes, 1) + attributes.Reset() + attributes.With(c.config.Attributes).Set(1) return updates, err } diff --git a/collector/collector.go b/collector/collector.go index e713cc44..47deb2e9 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -1,11 +1,9 @@ // Exporter is a prometheus exporter using multiple Factories to collect and export system metrics. package collector -import ( - "github.com/prometheus/client_golang/prometheus" -) +const Namespace = "node" -var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error)) +var Factories = make(map[string]func(Config) (Collector, error)) // Interface a collector has to implement. type Collector interface { @@ -13,6 +11,12 @@ type Collector interface { Update() (n int, err error) } +// TODO: Instead of periodically call Update, a Collector could be implemented +// as a real prometheus.Collector that only gathers metrics when +// scraped. (However, for metric gathering that takes very long, it might +// actually be better to do them proactively before scraping to minimize scrape +// time.) + type Config struct { Attributes map[string]string `json:"attributes"` } diff --git a/collector/diskstats.go b/collector/diskstats.go index fe7bd7d0..a5a18c3f 100644 --- a/collector/diskstats.go +++ b/collector/diskstats.go @@ -18,35 +18,119 @@ import ( const ( procDiskStats = "/proc/diskstats" + diskSubsystem = "disk" ) -type diskStat struct { - name string - metric prometheus.Metric - documentation string -} - var ( ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop|(h|s|xv)d[a-z])\\d+$", "Regexp of devices to ignore for diskstats.") + diskLabelNames = []string{"device"} + // Docs from https://www.kernel.org/doc/Documentation/iostats.txt - diskStatsMetrics = []diskStat{ - {"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."}, - {"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."}, - {"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."}, - {"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."}, - {"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"}, - {"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."}, - {"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."}, - {"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."}, - {"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."}, - {"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"}, + diskStatsMetrics = []prometheus.Collector{ + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "reads_completed", + Help: "The total number of reads completed successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "reads_merged", + Help: "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "sectors_read", + Help: "The total number of sectors read successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "read_time_ms", + Help: "The total number of milliseconds spent by all reads.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "writes_completed", + Help: "The total number of writes completed successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "writes_merged", + Help: "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "sectors_written", + Help: "The total number of sectors written successfully.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "write_time_ms", + Help: "This is the total number of milliseconds spent by all writes.", + }, + diskLabelNames, + ), + prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_now", + Help: "The number of I/Os currently in progress.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_time_ms", + Help: "Milliseconds spent doing I/Os.", + }, + diskLabelNames, + ), + prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Subsystem: diskSubsystem, + Name: "io_time_weighted", + Help: "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt.", + }, + diskLabelNames, + ), } ) type diskstatsCollector struct { - registry prometheus.Registry config Config ignoredDevicesPattern *regexp.Regexp } @@ -57,20 +141,16 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // disk device stats. -func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewDiskstatsCollector(config Config) (Collector, error) { c := diskstatsCollector{ config: config, - registry: registry, ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices), } - for _, v := range diskStatsMetrics { - registry.Register( - "node_disk_"+v.name, - v.documentation, - prometheus.NilLabels, - v.metric, - ) + for _, c := range diskStatsMetrics { + if _, err := prometheus.RegisterOrGet(c); err != nil { + return nil, err + } } return &c, nil } @@ -91,13 +171,12 @@ func (c *diskstatsCollector) Update() (updates int, err error) { if err != nil { return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err) } - labels := map[string]string{"device": dev} - counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter) + counter, ok := diskStatsMetrics[k].(*prometheus.CounterVec) if ok { - counter.Set(labels, v) + counter.WithLabelValues(dev).Set(v) } else { - var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge) - gauge.Set(labels, v) + var gauge = diskStatsMetrics[k].(*prometheus.GaugeVec) + gauge.WithLabelValues(dev).Set(v) } } } diff --git a/collector/filesystem.go b/collector/filesystem.go index 08e729ae..6bb43bb0 100644 --- a/collector/filesystem.go +++ b/collector/filesystem.go @@ -11,26 +11,68 @@ import ( "strings" "syscall" - "github.com/golang/glog" + "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" ) const ( - procMounts = "/proc/mounts" + procMounts = "/proc/mounts" + filesystemSubsystem = "filesystem" ) var ( - fsSizeMetric = prometheus.NewGauge() - fsFreeMetric = prometheus.NewGauge() - fsAvailMetric = prometheus.NewGauge() - fsFilesMetric = prometheus.NewGauge() - fsFilesFreeMetric = prometheus.NewGauge() + filesystemLabelNames = []string{"filesystem"} + + fsSizeMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "size", + Help: "Filesystem size in bytes.", + }, + filesystemLabelNames, + ) + fsFreeMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "free", + Help: "Filesystem free space in bytes.", + }, + filesystemLabelNames, + ) + fsAvailMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "avail", + Help: "Filesystem space available to non-root users in bytes.", + }, + filesystemLabelNames, + ) + fsFilesMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "files", + Help: "Filesystem total file nodes.", + }, + filesystemLabelNames, + ) + fsFilesFreeMetric = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: filesystemSubsystem, + Name: "files_free", + Help: "Filesystem total free file nodes.", + }, + filesystemLabelNames, + ) ignoredMountPoints = flag.String("filesystemIgnoredMountPoints", "^/(sys|proc|dev)($|/)", "Regexp of mount points to ignore for filesystem collector.") ) type filesystemCollector struct { - registry prometheus.Registry config Config ignoredMountPointsPattern *regexp.Regexp } @@ -41,42 +83,26 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device filesystems. -func NewFilesystemCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewFilesystemCollector(config Config) (Collector, error) { c := filesystemCollector{ - config: config, - registry: registry, + config: config, ignoredMountPointsPattern: regexp.MustCompile(*ignoredMountPoints), } - registry.Register( - "node_filesystem_size", - "Filesystem size in bytes.", - prometheus.NilLabels, - fsSizeMetric, - ) - registry.Register( - "node_filesystem_free", - "Filesystem free space in bytes.", - prometheus.NilLabels, - fsFreeMetric, - ) - registry.Register( - "node_filesystem_avail", - "Filesystem space available to non-root users in bytes.", - prometheus.NilLabels, - fsAvailMetric, - ) - registry.Register( - "node_filesystem_files", - "Filesystem total file nodes.", - prometheus.NilLabels, - fsFilesMetric, - ) - registry.Register( - "node_filesystem_files_free", - "Filesystem total free file nodes.", - prometheus.NilLabels, - fsFilesFreeMetric, - ) + if _, err := prometheus.RegisterOrGet(fsSizeMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(fsFreeMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(fsAvailMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(fsFilesMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(fsFilesFreeMetric); err != nil { + return nil, err + } return &c, nil } @@ -96,11 +122,11 @@ func (c *filesystemCollector) Update() (updates int, err error) { if err != nil { return updates, fmt.Errorf("Statfs on %s returned %s", mp, err) } - fsSizeMetric.Set(map[string]string{"filesystem": mp}, float64(buf.Blocks)*float64(buf.Bsize)) - fsFreeMetric.Set(map[string]string{"filesystem": mp}, float64(buf.Bfree)*float64(buf.Bsize)) - fsAvailMetric.Set(map[string]string{"filesystem": mp}, float64(buf.Bavail)*float64(buf.Bsize)) - fsFilesMetric.Set(map[string]string{"filesystem": mp}, float64(buf.Files)) - fsFilesFreeMetric.Set(map[string]string{"filesystem": mp}, float64(buf.Ffree)) + fsSizeMetric.WithLabelValues(mp).Set(float64(buf.Blocks) * float64(buf.Bsize)) + fsFreeMetric.WithLabelValues(mp).Set(float64(buf.Bfree) * float64(buf.Bsize)) + fsAvailMetric.WithLabelValues(mp).Set(float64(buf.Bavail) * float64(buf.Bsize)) + fsFilesMetric.WithLabelValues(mp).Set(float64(buf.Files)) + fsFilesFreeMetric.WithLabelValues(mp).Set(float64(buf.Ffree)) updates++ } return updates, err diff --git a/collector/gmond_collector.go b/collector/gmond_collector.go index 43cfd09d..f49bbdea 100644 --- a/collector/gmond_collector.go +++ b/collector/gmond_collector.go @@ -24,9 +24,8 @@ const ( ) type gmondCollector struct { - Metrics map[string]prometheus.Gauge - config Config - registry prometheus.Registry + Metrics map[string]*prometheus.GaugeVec + config Config } func init() { @@ -36,17 +35,16 @@ func init() { var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // Takes a config struct and prometheus registry and returns a new Collector scraping ganglia. -func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewGmondCollector(config Config) (Collector, error) { c := gmondCollector{ - config: config, - Metrics: make(map[string]prometheus.Gauge), - registry: registry, + config: config, + Metrics: map[string]*prometheus.GaugeVec{}, } return &c, nil } -func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) { +func (c *gmondCollector) setMetric(name, cluster string, metric ganglia.Metric) { if _, ok := c.Metrics[name]; !ok { var desc string var title string @@ -62,12 +60,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric } } glog.V(1).Infof("Register %s: %s", name, desc) - gauge := prometheus.NewGauge() - c.Metrics[name] = gauge - c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric! + gv := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: gangliaMetricsPrefix, + Name: name, + Help: desc, + }, + []string{"cluster"}, + ) + c.Metrics[name] = prometheus.MustRegisterOrGet(gv).(*prometheus.GaugeVec) } - glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value) - c.Metrics[name].Set(labels, metric.Value) + glog.V(1).Infof("Set %s{cluster=%q}: %f", name, cluster, metric.Value) + c.Metrics[name].WithLabelValues(cluster).Set(metric.Value) } func (c *gmondCollector) Update() (updates int, err error) { @@ -91,12 +95,9 @@ func (c *gmondCollector) Update() (updates int, err error) { for _, host := range cluster.Hosts { for _, metric := range host.Metrics { - name := gangliaMetricsPrefix + illegalCharsRE.ReplaceAllString(metric.Name, "_") + name := illegalCharsRE.ReplaceAllString(metric.Name, "_") - var labels = map[string]string{ - "cluster": cluster.Name, - } - c.setMetric(name, labels, metric) + c.setMetric(name, cluster.Name, metric) updates++ } } diff --git a/collector/interrupts.go b/collector/interrupts.go index a13e94c6..507047af 100644 --- a/collector/interrupts.go +++ b/collector/interrupts.go @@ -18,12 +18,18 @@ const ( ) var ( - interruptsMetric = prometheus.NewCounter() + interruptsMetric = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Name: "interrupts", + Help: "Interrupt details from /proc/interrupts.", + }, + []string{"CPU", "type", "info", "devices"}, + ) ) type interruptsCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -32,17 +38,13 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // interrupts stats -func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewInterruptsCollector(config Config) (Collector, error) { c := interruptsCollector{ - config: config, - registry: registry, + config: config, + } + if _, err := prometheus.RegisterOrGet(interruptsMetric); err != nil { + return nil, err } - registry.Register( - "node_interrupts", - "Interrupt details from /proc/interrupts", - prometheus.NilLabels, - interruptsMetric, - ) return &c, nil } @@ -58,13 +60,13 @@ func (c *interruptsCollector) Update() (updates int, err error) { if err != nil { return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) } - labels := map[string]string{ + labels := prometheus.Labels{ "CPU": strconv.Itoa(cpuNo), "type": name, "info": interrupt.info, "devices": interrupt.devices, } - interruptsMetric.Set(labels, fv) + interruptsMetric.With(labels).Set(fv) } } return updates, err diff --git a/collector/lastlogin.go b/collector/lastlogin.go index bfd37cc7..36c43b94 100644 --- a/collector/lastlogin.go +++ b/collector/lastlogin.go @@ -14,13 +14,19 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +const lastLoginSubsystem = "last_login" + var ( - lastSeen = prometheus.NewGauge() + lastSeen = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: lastLoginSubsystem, + Name: "time", + Help: "The time of the last login.", + }) ) type lastLoginCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -29,17 +35,13 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // load, seconds since last login and a list of tags as specified by config. -func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewLastLoginCollector(config Config) (Collector, error) { c := lastLoginCollector{ - config: config, - registry: registry, + config: config, + } + if _, err := prometheus.RegisterOrGet(lastSeen); err != nil { + return nil, err } - registry.Register( - "node_last_login_time", - "The time of the last login.", - prometheus.NilLabels, - lastSeen, - ) return &c, nil } @@ -50,7 +52,7 @@ func (c *lastLoginCollector) Update() (updates int, err error) { } updates++ glog.V(1).Infof("Set node_last_login_time: %f", last) - lastSeen.Set(nil, last) + lastSeen.Set(last) return updates, err } diff --git a/collector/loadavg.go b/collector/loadavg.go index a6104df2..143cfe2f 100644 --- a/collector/loadavg.go +++ b/collector/loadavg.go @@ -17,12 +17,15 @@ const ( ) var ( - load1 = prometheus.NewGauge() + load1 = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "load1", + Help: "1m load average.", + }) ) type loadavgCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -31,18 +34,14 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // load, seconds since last login and a list of tags as specified by config. -func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewLoadavgCollector(config Config) (Collector, error) { c := loadavgCollector{ - config: config, - registry: registry, + config: config, } - registry.Register( - "node_load1", - "1m load average", - prometheus.NilLabels, - load1, - ) + if _, err := prometheus.RegisterOrGet(load1); err != nil { + return nil, err + } return &c, nil } @@ -53,7 +52,7 @@ func (c *loadavgCollector) Update() (updates int, err error) { } updates++ glog.V(1).Infof("Set node_load: %f", load) - load1.Set(nil, load) + load1.Set(load) return updates, err } diff --git a/collector/meminfo.go b/collector/meminfo.go index 180cfd7f..64d4ae06 100644 --- a/collector/meminfo.go +++ b/collector/meminfo.go @@ -16,7 +16,8 @@ import ( ) const ( - procMemInfo = "/proc/meminfo" + procMemInfo = "/proc/meminfo" + memInfoSubsystem = "memory" ) var ( @@ -24,8 +25,7 @@ var ( ) type meminfoCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -34,10 +34,9 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // memory stats. -func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewMeminfoCollector(config Config) (Collector, error) { c := meminfoCollector{ - config: config, - registry: registry, + config: config, } return &c, nil } @@ -50,16 +49,16 @@ func (c *meminfoCollector) Update() (updates int, err error) { glog.V(1).Infof("Set node_mem: %#v", memInfo) for k, v := range memInfo { if _, ok := memInfoMetrics[k]; !ok { - memInfoMetrics[k] = prometheus.NewGauge() - c.registry.Register( - "node_memory_"+k, - k+" from /proc/meminfo", - prometheus.NilLabels, - memInfoMetrics[k], - ) + gauge := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: memInfoSubsystem, + Name: k, + Help: k + " from /proc/meminfo.", + }) + memInfoMetrics[k] = prometheus.MustRegisterOrGet(gauge).(prometheus.Gauge) } updates++ - memInfoMetrics[k].Set(nil, v) + memInfoMetrics[k].Set(v) } return updates, err } diff --git a/collector/netdev.go b/collector/netdev.go index 4824fbdf..cff06888 100644 --- a/collector/netdev.go +++ b/collector/netdev.go @@ -14,16 +14,16 @@ import ( ) const ( - procNetDev = "/proc/net/dev" + procNetDev = "/proc/net/dev" + netStatsSubsystem = "network" ) var ( - netStatsMetrics = map[string]prometheus.Gauge{} + netStatsMetrics = map[string]*prometheus.GaugeVec{} ) type netDevCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -32,10 +32,9 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device stats. -func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewNetDevCollector(config Config) (Collector, error) { c := netDevCollector{ - config: config, - registry: registry, + config: config, } return &c, nil } @@ -50,20 +49,23 @@ func (c *netDevCollector) Update() (updates int, err error) { for t, value := range stats { key := direction + "_" + t if _, ok := netStatsMetrics[key]; !ok { - netStatsMetrics[key] = prometheus.NewGauge() - c.registry.Register( - "node_network_"+key, - t+" "+direction+" from /proc/net/dev", - prometheus.NilLabels, - netStatsMetrics[key], + gv := prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: netStatsSubsystem, + Name: key, + Help: fmt.Sprintf("%s %s from /proc/net/dev.", t, direction), + }, + []string{"device"}, ) + netStatsMetrics[key] = prometheus.MustRegisterOrGet(gv).(*prometheus.GaugeVec) } updates++ v, err := strconv.ParseFloat(value, 64) if err != nil { return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err) } - netStatsMetrics[key].Set(map[string]string{"device": dev}, v) + netStatsMetrics[key].WithLabelValues(dev).Set(v) } } } diff --git a/collector/runit_collector.go b/collector/runit_collector.go index 6e7c2096..e7375496 100644 --- a/collector/runit_collector.go +++ b/collector/runit_collector.go @@ -8,45 +8,57 @@ import ( "github.com/soundcloud/go-runit/runit" ) +var ( + runitLabelNames = []string{"service"} + + runitState = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "service_state", + Help: "node_exporter: state of runit service.", + }, + runitLabelNames, + ) + runitStateDesired = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "service_desired_state", + Help: "node_exporter: desired state of runit service.", + }, + runitLabelNames, + ) + runitStateNormal = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "service_normal_state", + Help: "node_exporter: normal state of runit service.", + }, + runitLabelNames, + ) +) + type runitCollector struct { - config Config - state prometheus.Gauge - stateDesired prometheus.Gauge - stateNormal prometheus.Gauge + config Config } func init() { Factories["runit"] = NewRunitCollector } -func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewRunitCollector(config Config) (Collector, error) { c := runitCollector{ - config: config, - state: prometheus.NewGauge(), - stateDesired: prometheus.NewGauge(), - stateNormal: prometheus.NewGauge(), + config: config, } - registry.Register( - "node_service_state", - "node_exporter: state of runit service.", - prometheus.NilLabels, - c.state, - ) - - registry.Register( - "node_service_desired_state", - "node_exporter: desired state of runit service.", - prometheus.NilLabels, - c.stateDesired, - ) - - registry.Register( - "node_service_normal_state", - "node_exporter: normal state of runit service.", - prometheus.NilLabels, - c.stateNormal, - ) + if _, err := prometheus.RegisterOrGet(runitState); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(runitStateDesired); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(runitStateNormal); err != nil { + return nil, err + } return &c, nil } @@ -65,16 +77,12 @@ func (c *runitCollector) Update() (updates int, err error) { } glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration) - labels := map[string]string{ - "service": service.Name, - } - - c.state.Set(labels, float64(status.State)) - c.stateDesired.Set(labels, float64(status.Want)) + runitState.WithLabelValues(service.Name).Set(float64(status.State)) + runitStateDesired.WithLabelValues(service.Name).Set(float64(status.Want)) if status.NormallyUp { - c.stateNormal.Set(labels, 1) + runitStateNormal.WithLabelValues(service.Name).Set(1) } else { - c.stateNormal.Set(labels, 1) + runitStateNormal.WithLabelValues(service.Name).Set(1) } updates += 3 } diff --git a/collector/stat.go b/collector/stat.go index 2b5e3531..2d853cdf 100644 --- a/collector/stat.go +++ b/collector/stat.go @@ -19,18 +19,48 @@ const ( ) var ( - cpuMetrics = prometheus.NewCounter() - intrMetric = prometheus.NewCounter() - ctxtMetric = prometheus.NewCounter() - btimeMetric = prometheus.NewGauge() - forksMetric = prometheus.NewCounter() - procsRunningMetric = prometheus.NewGauge() - procsBlockedMetric = prometheus.NewGauge() + cpuMetrics = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: Namespace, + Name: "cpu", + Help: "Seconds the cpus spent in each mode.", + }, + []string{"cpu", "mode"}, + ) + intrMetric = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "intr", + Help: "Total number of interrupts serviced.", + }) + ctxtMetric = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "context_switches", + Help: "Total number of context switches.", + }) + forksMetric = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: Namespace, + Name: "forks", + Help: "Total number of forks.", + }) + btimeMetric = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "boot_time", + Help: "Node boot time, in unixtime.", + }) + procsRunningMetric = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "procs_running", + Help: "Number of processes in runnable state.", + }) + procsBlockedMetric = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: Namespace, + Name: "procs_blocked", + Help: "Number of processes blocked waiting for I/O to complete.", + }) ) type statCollector struct { - registry prometheus.Registry - config Config + config Config } func init() { @@ -39,53 +69,31 @@ func init() { // Takes a config struct and prometheus registry and returns a new Collector exposing // network device stats. -func NewStatCollector(config Config, registry prometheus.Registry) (Collector, error) { +func NewStatCollector(config Config) (Collector, error) { c := statCollector{ - config: config, - registry: registry, + config: config, + } + if _, err := prometheus.RegisterOrGet(cpuMetrics); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(intrMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(ctxtMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(forksMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(btimeMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(procsRunningMetric); err != nil { + return nil, err + } + if _, err := prometheus.RegisterOrGet(procsBlockedMetric); err != nil { + return nil, err } - registry.Register( - "node_cpu", - "Seconds the cpus spent in each mode.", - prometheus.NilLabels, - cpuMetrics, - ) - registry.Register( - "node_intr", - "Total number of interrupts serviced", - prometheus.NilLabels, - intrMetric, - ) - registry.Register( - "node_context_switches", - "Total number of context switches.", - prometheus.NilLabels, - ctxtMetric, - ) - registry.Register( - "node_forks", - "Total number of forks.", - prometheus.NilLabels, - forksMetric, - ) - registry.Register( - "node_boot_time", - "Node boot time, in unixtime.", - prometheus.NilLabels, - btimeMetric, - ) - registry.Register( - "node_procs_running", - "Number of processes in runnable state.", - prometheus.NilLabels, - procsRunningMetric, - ) - registry.Register( - "node_procs_blocked", - "Number of processes blocked waiting for I/O to complete.", - prometheus.NilLabels, - procsBlockedMetric, - ) return &c, nil } @@ -113,9 +121,9 @@ func (c *statCollector) Update() (updates int, err error) { if err != nil { return updates, err } - // Convert from ticks to seconds + // Convert from ticks to seconds value /= float64(C.sysconf(C._SC_CLK_TCK)) - cpuMetrics.Set(map[string]string{"cpu": parts[0], "mode": cpuFields[i]}, value) + cpuMetrics.With(prometheus.Labels{"cpu": parts[0], "mode": cpuFields[i]}).Set(value) } case parts[0] == "intr": // Only expose the overall number, use the 'interrupts' collector for more detail. @@ -123,37 +131,37 @@ func (c *statCollector) Update() (updates int, err error) { if err != nil { return updates, err } - intrMetric.Set(prometheus.NilLabels, value) + intrMetric.Set(value) case parts[0] == "ctxt": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return updates, err } - ctxtMetric.Set(prometheus.NilLabels, value) + ctxtMetric.Set(value) case parts[0] == "processes": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return updates, err } - forksMetric.Set(prometheus.NilLabels, value) + forksMetric.Set(value) case parts[0] == "btime": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return updates, err } - btimeMetric.Set(prometheus.NilLabels, value) + btimeMetric.Set(value) case parts[0] == "procs_running": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return updates, err } - procsRunningMetric.Set(prometheus.NilLabels, value) + procsRunningMetric.Set(value) case parts[0] == "procs_blocked": value, err := strconv.ParseFloat(parts[1], 64) if err != nil { return updates, err } - procsBlockedMetric.Set(prometheus.NilLabels, value) + procsBlockedMetric.Set(value) } } return updates, err diff --git a/node_exporter.conf b/node_exporter.conf index 6800bac1..8ce3c679 100644 --- a/node_exporter.conf +++ b/node_exporter.conf @@ -1,6 +1,6 @@ { "attributes" : { - "web-server" : "1", + "web_server" : "1", "zone" : "a", "default" : "1" } diff --git a/node_exporter.go b/node_exporter.go index ded1a4dc..7622cc61 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -17,10 +17,11 @@ import ( "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/exp" "github.com/prometheus/node_exporter/collector" ) +const subsystem = "exporter" + var ( configFile = flag.String("config", "node_exporter.conf", "config file.") memProfile = flag.String("memprofile", "", "write memory profile to this file") @@ -28,8 +29,27 @@ var ( enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,filesystem,loadavg,meminfo,stat,netdev", "comma-seperated list of collectors to use") printCollectors = flag.Bool("printCollectors", false, "If true, print available collectors and exit") interval = flag.Duration("interval", 60*time.Second, "refresh interval") - scrapeDurations = prometheus.NewDefaultHistogram() - metricsUpdated = prometheus.NewGauge() + + collectorLabelNames = []string{"collector", "result"} + + scrapeDurations = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: collector.Namespace, + Subsystem: subsystem, + Name: "scrape_duration_seconds", + Help: "node_exporter: Duration of a scrape job.", + }, + collectorLabelNames, + ) + metricsUpdated = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: collector.Namespace, + Subsystem: subsystem, + Name: "metrics_updated", + Help: "node_exporter: Number of metrics updated.", + }, + collectorLabelNames, + ) ) func main() { @@ -41,14 +61,13 @@ func main() { } return } - registry := prometheus.NewRegistry() - collectors, err := loadCollectors(*configFile, registry) + collectors, err := loadCollectors(*configFile) if err != nil { log.Fatalf("Couldn't load config and collectors: %s", err) } - registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations) - registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated) + prometheus.MustRegister(scrapeDurations) + prometheus.MustRegister(metricsUpdated) glog.Infof("Enabled collectors:") for n, _ := range collectors { @@ -60,7 +79,7 @@ func main() { signal.Notify(sigHup, syscall.SIGHUP) signal.Notify(sigUsr1, syscall.SIGUSR1) - go serveStatus(registry) + go serveStatus() glog.Infof("Starting initial collection") collect(collectors) @@ -69,7 +88,7 @@ func main() { for { select { case <-sigHup: - collectors, err = loadCollectors(*configFile, registry) + collectors, err = loadCollectors(*configFile) if err != nil { log.Fatalf("Couldn't load config and collectors: %s", err) } @@ -96,7 +115,7 @@ func main() { } -func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) { +func loadCollectors(file string) (map[string]collector.Collector, error) { collectors := map[string]collector.Collector{} config, err := getConfig(file) if err != nil { @@ -107,7 +126,7 @@ func loadCollectors(file string, registry prometheus.Registry) (map[string]colle if !ok { log.Fatalf("Collector '%s' not available", name) } - c, err := fn(*config, registry) + c, err := fn(*config) if err != nil { return nil, err } @@ -126,9 +145,9 @@ func getConfig(file string) (*collector.Config, error) { return config, json.Unmarshal(bytes, &config) } -func serveStatus(registry prometheus.Registry) { - exp.Handle(prometheus.ExpositionResource, registry.Handler()) - http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux) +func serveStatus() { + http.Handle("/metrics", prometheus.Handler()) + http.ListenAndServe(*listeningAddress, nil) } func collect(collectors map[string]collector.Collector) { @@ -147,17 +166,15 @@ func Execute(name string, c collector.Collector) { begin := time.Now() updates, err := c.Update() duration := time.Since(begin) + var result string - label := map[string]string{ - "collector": name, - } if err != nil { glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err) - label["result"] = "error" + result = "error" } else { glog.Infof("OK: %s success after %fs.", name, duration.Seconds()) - label["result"] = "success" + result = "success" } - scrapeDurations.Add(label, duration.Seconds()) - metricsUpdated.Set(label, float64(updates)) + scrapeDurations.WithLabelValues(name, result).Observe(duration.Seconds()) + metricsUpdated.WithLabelValues(name, result).Set(float64(updates)) }