Split native collector into it's component parts and make them enablable.

Last login is disabled by default as it's broken on ubuntu 12.04
Interrupts is disabled by default as it's very granular and we'll have total interrupts from /proc/stat

Allow ignoring devices from diskstats, ignore ram and loop devices by default.

Use glog for logging.
This commit is contained in:
Brian Brazil 2014-06-04 12:12:34 +01:00
parent 964cdbfcc9
commit 25ea90369c
14 changed files with 747 additions and 516 deletions

View file

@ -1,25 +1,26 @@
# node_exporter
Prometheus exporter with plugable metric collectors.
Prometheus exporter with pluggable metric collectors.
## Available collectors
By default it will only include the NativeCollector.
By default the build will only include the native collectors
that expose information from /proc.
To include other collectors, specify the build tags lile this:
go build -tags 'ganglia runit' node_exporter.go
Which collectors are used is controled by the --enabledCollectors flag.
### NativeCollector
Provides metrics for load, seconds since last login and a list of tags
read from `node_exporter.conf`.
To disable the native collector, use build tag `nonative`.
### GmondCollector (tag: ganglia)

43
collector/attributes.go Normal file
View file

@ -0,0 +1,43 @@
// +build !noattributes
package collector
import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
var (
attributes = prometheus.NewGauge()
)
type attributesCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["attributes"] = NewAttributesCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// labels from the config.
func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := attributesCollector{
config: config,
registry: registry,
}
registry.Register(
"node_attributes",
"node_exporter attributes",
prometheus.NilLabels,
attributes,
)
return &c, nil
}
func (c *attributesCollector) Update() (updates int, err error) {
glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes)
attributes.Set(c.config.Attributes, 1)
return updates, err
}

View file

@ -5,15 +5,12 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
var Factories []func(Config, prometheus.Registry) (Collector, error)
var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error))
// Interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() (n int, err error)
// Returns the name of the collector.
Name() string
}
type Config struct {

131
collector/diskstats.go Normal file
View file

@ -0,0 +1,131 @@
// +build !nonative
package collector
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procDiskStats = "/proc/diskstats"
)
type diskStat struct {
name string
metric prometheus.Metric
documentation string
}
var (
ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop)\\d+$", "Regexp of devices to ignore for diskstats.")
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
diskStatsMetrics = []diskStat{
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
}
)
type diskstatsCollector struct {
registry prometheus.Registry
config Config
ignoredDevicesPattern *regexp.Regexp
}
func init() {
Factories["diskstats"] = NewDiskstatsCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// disk device stats.
func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := diskstatsCollector{
config: config,
registry: registry,
ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices),
}
for _, v := range diskStatsMetrics {
registry.Register(
"node_disk_"+v.name,
v.documentation,
prometheus.NilLabels,
v.metric,
)
}
return &c, nil
}
func (c *diskstatsCollector) Update() (updates int, err error) {
diskStats, err := getDiskStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
}
for dev, stats := range diskStats {
if c.ignoredDevicesPattern.MatchString(dev) {
glog.V(1).Infof("Ignoring device: %s", dev)
continue
}
for k, value := range stats {
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
}
labels := map[string]string{"device": dev}
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
if ok {
counter.Set(labels, v)
} else {
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
gauge.Set(labels, v)
}
}
}
return updates, err
}
func getDiskStats() (map[string]map[int]string, error) {
file, err := os.Open(procDiskStats)
if err != nil {
return nil, err
}
return parseDiskStats(file)
}
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
defer r.Close()
diskStats := map[string]map[int]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
}
dev := parts[2]
diskStats[dev] = map[int]string{}
for i, v := range parts[3:] {
diskStats[dev][i] = v
}
}
return diskStats, nil
}

View file

@ -11,6 +11,7 @@ import (
"regexp"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/node_exporter/collector/ganglia"
)
@ -23,14 +24,13 @@ const (
)
type gmondCollector struct {
name string
Metrics map[string]prometheus.Gauge
config Config
registry prometheus.Registry
}
func init() {
Factories = append(Factories, NewGmondCollector)
Factories["gmond"] = NewGmondCollector
}
var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
@ -38,7 +38,6 @@ var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// Takes a config struct and prometheus registry and returns a new Collector scraping ganglia.
func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := gmondCollector{
name: "gmond_collector",
config: config,
Metrics: make(map[string]prometheus.Gauge),
registry: registry,
@ -47,8 +46,6 @@ func NewGmondCollector(config Config, registry prometheus.Registry) (Collector,
return &c, nil
}
func (c *gmondCollector) Name() string { return c.name }
func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) {
if _, ok := c.Metrics[name]; !ok {
var desc string
@ -64,18 +61,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric
break
}
}
debug(c.Name(), "Register %s: %s", name, desc)
glog.V(1).Infof("Register %s: %s", name, desc)
gauge := prometheus.NewGauge()
c.Metrics[name] = gauge
c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric!
}
debug(c.Name(), "Set %s{%s}: %f", name, labels, metric.Value)
glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value)
c.Metrics[name].Set(labels, metric.Value)
}
func (c *gmondCollector) Update() (updates int, err error) {
conn, err := net.Dial(gangliaProto, gangliaAddress)
debug(c.Name(), "gmondCollector Update")
glog.V(1).Infof("gmondCollector Update")
if err != nil {
return updates, fmt.Errorf("Can't connect to gmond: %s", err)
}

View file

@ -1,22 +1,11 @@
package collector
import (
"flag"
"fmt"
"log"
"strconv"
"strings"
)
var verbose = flag.Bool("verbose", false, "Verbose output.")
func debug(name string, format string, a ...interface{}) {
if *verbose {
f := fmt.Sprintf("%s: %s", name, format)
log.Printf(f, a...)
}
}
func splitToInts(str string, sep string) (ints []int, err error) {
for _, part := range strings.Split(str, sep) {
i, err := strconv.Atoi(part)

116
collector/interrupts.go Normal file
View file

@ -0,0 +1,116 @@
// +build !nointerrupts
package collector
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
)
const (
procInterrupts = "/proc/interrupts"
)
var (
interruptsMetric = prometheus.NewCounter()
)
type interruptsCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["interrupts"] = NewInterruptsCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// interrupts stats
func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := interruptsCollector{
config: config,
registry: registry,
}
registry.Register(
"node_interrupts",
"Interrupt details from /proc/interrupts",
prometheus.NilLabels,
interruptsMetric,
)
return &c, nil
}
func (c *interruptsCollector) Update() (updates int, err error) {
interrupts, err := getInterrupts()
if err != nil {
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
}
for name, interrupt := range interrupts {
for cpuNo, value := range interrupt.values {
updates++
fv, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
}
labels := map[string]string{
"CPU": strconv.Itoa(cpuNo),
"type": name,
"info": interrupt.info,
"devices": interrupt.devices,
}
interruptsMetric.Set(labels, fv)
}
}
return updates, err
}
type interrupt struct {
info string
devices string
values []string
}
func getInterrupts() (map[string]interrupt, error) {
file, err := os.Open(procInterrupts)
if err != nil {
return nil, err
}
return parseInterrupts(file)
}
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
defer r.Close()
interrupts := map[string]interrupt{}
scanner := bufio.NewScanner(r)
if !scanner.Scan() {
return nil, fmt.Errorf("%s empty", procInterrupts)
}
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
continue // we ignore ERR and MIS for now
}
intName := parts[0][:len(parts[0])-1] // remove trailing :
intr := interrupt{
values: parts[1:cpuNum],
}
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
intr.info = parts[cpuNum+1]
intr.devices = strings.Join(parts[cpuNum+2:], " ")
} else {
intr.info = strings.Join(parts[cpuNum+1:], " ")
}
interrupts[intName] = intr
}
return interrupts, nil
}

105
collector/lastlogin.go Normal file
View file

@ -0,0 +1,105 @@
// +build !nolastLogin
package collector
import (
"bufio"
"fmt"
"io"
"os/exec"
"strings"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
var (
lastSeen = prometheus.NewGauge()
)
type lastLoginCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["lastlogin"] = NewLastLoginCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := lastLoginCollector{
config: config,
registry: registry,
}
registry.Register(
"node_last_login_time",
"The time of the last login.",
prometheus.NilLabels,
lastSeen,
)
return &c, nil
}
func (c *lastLoginCollector) Update() (updates int, err error) {
last, err := getLastLoginTime()
if err != nil {
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
}
updates++
glog.V(1).Infof("Set node_last_login_time: %f", last)
lastSeen.Set(nil, last)
return updates, err
}
func getLastLoginTime() (float64, error) {
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
output, err := who.StdoutPipe()
if err != nil {
return 0, err
}
err = who.Start()
if err != nil {
return 0, err
}
reader := bufio.NewReader(output)
var last time.Time
for {
line, isPrefix, err := reader.ReadLine()
if err == io.EOF {
break
}
if isPrefix {
return 0, fmt.Errorf("line to long: %s(...)", line)
}
fields := strings.Fields(string(line))
lastDate := fields[2]
lastTime := fields[3]
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
if err != nil {
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
}
timeParts, err := splitToInts(lastTime, ":") // 11:33
if err != nil {
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
}
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
last = last_t
}
err = who.Wait()
if err != nil {
return 0, err
}
return float64(last.Unix()), nil
}

76
collector/loadavg.go Normal file
View file

@ -0,0 +1,76 @@
// +build !noloadavg
package collector
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procLoad = "/proc/loadavg"
)
var (
load1 = prometheus.NewGauge()
)
type loadavgCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["loadavg"] = NewLoadavgCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := loadavgCollector{
config: config,
registry: registry,
}
registry.Register(
"node_load1",
"1m load average",
prometheus.NilLabels,
load1,
)
return &c, nil
}
func (c *loadavgCollector) Update() (updates int, err error) {
load, err := getLoad1()
if err != nil {
return updates, fmt.Errorf("Couldn't get load: %s", err)
}
updates++
glog.V(1).Infof("Set node_load: %f", load)
load1.Set(nil, load)
return updates, err
}
func getLoad1() (float64, error) {
data, err := ioutil.ReadFile(procLoad)
if err != nil {
return 0, err
}
return parseLoad(string(data))
}
func parseLoad(data string) (float64, error) {
parts := strings.Fields(data)
load, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
}
return load, nil
}

101
collector/meminfo.go Normal file
View file

@ -0,0 +1,101 @@
// +build !nomeminfo
package collector
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procMemInfo = "/proc/meminfo"
)
var (
memInfoMetrics = map[string]prometheus.Gauge{}
)
type meminfoCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["meminfo"] = NewMeminfoCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// memory stats.
func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := meminfoCollector{
config: config,
registry: registry,
}
return &c, nil
}
func (c *meminfoCollector) Update() (updates int, err error) {
memInfo, err := getMemInfo()
if err != nil {
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
}
glog.V(1).Infof("Set node_mem: %#v", memInfo)
for k, v := range memInfo {
if _, ok := memInfoMetrics[k]; !ok {
memInfoMetrics[k] = prometheus.NewGauge()
c.registry.Register(
"node_memory_"+k,
k+" from /proc/meminfo",
prometheus.NilLabels,
memInfoMetrics[k],
)
}
updates++
memInfoMetrics[k].Set(nil, v)
}
return updates, err
}
func getMemInfo() (map[string]float64, error) {
file, err := os.Open(procMemInfo)
if err != nil {
return nil, err
}
return parseMemInfo(file)
}
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
defer r.Close()
memInfo := map[string]float64{}
scanner := bufio.NewScanner(r)
re := regexp.MustCompile("\\((.*)\\)")
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
fv, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
}
switch len(parts) {
case 2: // no unit
case 3: // has unit, we presume kB
fv *= 1024
default:
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
}
key := parts[0][:len(parts[0])-1] // remove trailing : from key
// Active(anon) -> Active_anon
key = re.ReplaceAllString(key, "_${1}")
memInfo[key] = fv
}
return memInfo, nil
}

View file

@ -1,454 +0,0 @@
// +build !nonative
package collector
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
procLoad = "/proc/loadavg"
procMemInfo = "/proc/meminfo"
procInterrupts = "/proc/interrupts"
procNetDev = "/proc/net/dev"
procDiskStats = "/proc/diskstats"
)
type diskStat struct {
name string
metric prometheus.Metric
documentation string
}
var (
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
diskStatsMetrics = []diskStat{
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
}
lastSeen = prometheus.NewGauge()
load1 = prometheus.NewGauge()
attributes = prometheus.NewGauge()
memInfoMetrics = map[string]prometheus.Gauge{}
netStatsMetrics = map[string]prometheus.Gauge{}
interruptsMetric = prometheus.NewCounter()
)
type nativeCollector struct {
registry prometheus.Registry
name string
config Config
}
func init() {
Factories = append(Factories, NewNativeCollector)
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := nativeCollector{
name: "native_collector",
config: config,
registry: registry,
}
registry.Register(
"node_load1",
"1m load average",
prometheus.NilLabels,
load1,
)
registry.Register(
"node_last_login_time",
"The time of the last login.",
prometheus.NilLabels,
lastSeen,
)
registry.Register(
"node_attributes",
"node_exporter attributes",
prometheus.NilLabels,
attributes,
)
registry.Register(
"node_interrupts",
"Interrupt details from /proc/interrupts",
prometheus.NilLabels,
interruptsMetric,
)
for _, v := range diskStatsMetrics {
registry.Register(
"node_disk_"+v.name,
v.documentation,
prometheus.NilLabels,
v.metric,
)
}
return &c, nil
}
func (c *nativeCollector) Name() string { return c.name }
func (c *nativeCollector) Update() (updates int, err error) {
last, err := getLastLoginTime()
if err != nil {
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
}
updates++
debug(c.Name(), "Set node_last_login_time: %f", last)
lastSeen.Set(nil, last)
load, err := getLoad1()
if err != nil {
return updates, fmt.Errorf("Couldn't get load: %s", err)
}
updates++
debug(c.Name(), "Set node_load: %f", load)
load1.Set(nil, load)
debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes)
attributes.Set(c.config.Attributes, 1)
memInfo, err := getMemInfo()
if err != nil {
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
}
debug(c.Name(), "Set node_mem: %#v", memInfo)
for k, v := range memInfo {
if _, ok := memInfoMetrics[k]; !ok {
memInfoMetrics[k] = prometheus.NewGauge()
c.registry.Register(
"node_memory_"+k,
k+" from /proc/meminfo",
prometheus.NilLabels,
memInfoMetrics[k],
)
}
updates++
memInfoMetrics[k].Set(nil, v)
}
interrupts, err := getInterrupts()
if err != nil {
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
}
for name, interrupt := range interrupts {
for cpuNo, value := range interrupt.values {
updates++
fv, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
}
labels := map[string]string{
"CPU": strconv.Itoa(cpuNo),
"type": name,
"info": interrupt.info,
"devices": interrupt.devices,
}
interruptsMetric.Set(labels, fv)
}
}
netStats, err := getNetStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
}
for direction, devStats := range netStats {
for dev, stats := range devStats {
for t, value := range stats {
key := direction + "_" + t
if _, ok := netStatsMetrics[key]; !ok {
netStatsMetrics[key] = prometheus.NewGauge()
c.registry.Register(
"node_network_"+key,
t+" "+direction+" from /proc/net/dev",
prometheus.NilLabels,
netStatsMetrics[key],
)
}
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
}
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
}
}
}
diskStats, err := getDiskStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
}
for dev, stats := range diskStats {
for k, value := range stats {
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
}
labels := map[string]string{"device": dev}
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
if ok {
counter.Set(labels, v)
} else {
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
gauge.Set(labels, v)
}
}
}
return updates, err
}
func getLoad1() (float64, error) {
data, err := ioutil.ReadFile(procLoad)
if err != nil {
return 0, err
}
return parseLoad(string(data))
}
func parseLoad(data string) (float64, error) {
parts := strings.Fields(data)
load, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
}
return load, nil
}
func getLastLoginTime() (float64, error) {
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
output, err := who.StdoutPipe()
if err != nil {
return 0, err
}
err = who.Start()
if err != nil {
return 0, err
}
reader := bufio.NewReader(output)
var last time.Time
for {
line, isPrefix, err := reader.ReadLine()
if err == io.EOF {
break
}
if isPrefix {
return 0, fmt.Errorf("line to long: %s(...)", line)
}
fields := strings.Fields(string(line))
lastDate := fields[2]
lastTime := fields[3]
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
if err != nil {
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
}
timeParts, err := splitToInts(lastTime, ":") // 11:33
if err != nil {
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
}
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
last = last_t
}
err = who.Wait()
if err != nil {
return 0, err
}
return float64(last.Unix()), nil
}
func getMemInfo() (map[string]float64, error) {
file, err := os.Open(procMemInfo)
if err != nil {
return nil, err
}
return parseMemInfo(file)
}
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
defer r.Close()
memInfo := map[string]float64{}
scanner := bufio.NewScanner(r)
re := regexp.MustCompile("\\((.*)\\)")
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
fv, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
}
switch len(parts) {
case 2: // no unit
case 3: // has unit, we presume kB
fv *= 1024
default:
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
}
key := parts[0][:len(parts[0])-1] // remove trailing : from key
// Active(anon) -> Active_anon
key = re.ReplaceAllString(key, "_${1}")
memInfo[key] = fv
}
return memInfo, nil
}
type interrupt struct {
info string
devices string
values []string
}
func getInterrupts() (map[string]interrupt, error) {
file, err := os.Open(procInterrupts)
if err != nil {
return nil, err
}
return parseInterrupts(file)
}
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
defer r.Close()
interrupts := map[string]interrupt{}
scanner := bufio.NewScanner(r)
if !scanner.Scan() {
return nil, fmt.Errorf("%s empty", procInterrupts)
}
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
continue // we ignore ERR and MIS for now
}
intName := parts[0][:len(parts[0])-1] // remove trailing :
intr := interrupt{
values: parts[1:cpuNum],
}
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
intr.info = parts[cpuNum+1]
intr.devices = strings.Join(parts[cpuNum+2:], " ")
} else {
intr.info = strings.Join(parts[cpuNum+1:], " ")
}
interrupts[intName] = intr
}
return interrupts, nil
}
func getNetStats() (map[string]map[string]map[string]string, error) {
file, err := os.Open(procNetDev)
if err != nil {
return nil, err
}
return parseNetStats(file)
}
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
defer r.Close()
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
parts := strings.Split(string(scanner.Text()), "|")
if len(parts) != 3 { // interface + receive + transmit
return nil, fmt.Errorf("Invalid header line in %s: %s",
procNetDev, scanner.Text())
}
header := strings.Fields(parts[1])
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != 2*len(header)+1 {
return nil, fmt.Errorf("Invalid line in %s: %s",
procNetDev, scanner.Text())
}
dev := parts[0][:len(parts[0])-1]
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
if err != nil {
return nil, err
}
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
if err != nil {
return nil, err
}
netStats["transmit"][dev] = transmit
netStats["receive"][dev] = receive
}
return netStats, nil
}
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
devStats := map[string]string{}
for i, v := range parts {
devStats[header[i]] = v
}
return devStats, nil
}
func getDiskStats() (map[string]map[int]string, error) {
file, err := os.Open(procDiskStats)
if err != nil {
return nil, err
}
return parseDiskStats(file)
}
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
defer r.Close()
diskStats := map[string]map[int]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
}
dev := parts[2]
diskStats[dev] = map[int]string{}
for i, v := range parts[3:] {
diskStats[dev][i] = v
}
}
return diskStats, nil
}

125
collector/netdev.go Normal file
View file

@ -0,0 +1,125 @@
// +build !nonetDev
package collector
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
)
const (
procNetDev = "/proc/net/dev"
)
var (
netStatsMetrics = map[string]prometheus.Gauge{}
)
type netDevCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["netdev"] = NewNetDevCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// network device stats.
func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := netDevCollector{
config: config,
registry: registry,
}
return &c, nil
}
func (c *netDevCollector) Update() (updates int, err error) {
netStats, err := getNetStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
}
for direction, devStats := range netStats {
for dev, stats := range devStats {
for t, value := range stats {
key := direction + "_" + t
if _, ok := netStatsMetrics[key]; !ok {
netStatsMetrics[key] = prometheus.NewGauge()
c.registry.Register(
"node_network_"+key,
t+" "+direction+" from /proc/net/dev",
prometheus.NilLabels,
netStatsMetrics[key],
)
}
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
}
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
}
}
}
return updates, err
}
func getNetStats() (map[string]map[string]map[string]string, error) {
file, err := os.Open(procNetDev)
if err != nil {
return nil, err
}
return parseNetStats(file)
}
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
defer r.Close()
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
parts := strings.Split(string(scanner.Text()), "|")
if len(parts) != 3 { // interface + receive + transmit
return nil, fmt.Errorf("Invalid header line in %s: %s",
procNetDev, scanner.Text())
}
header := strings.Fields(parts[1])
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != 2*len(header)+1 {
return nil, fmt.Errorf("Invalid line in %s: %s",
procNetDev, scanner.Text())
}
dev := parts[0][:len(parts[0])-1]
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
if err != nil {
return nil, err
}
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
if err != nil {
return nil, err
}
netStats["transmit"][dev] = transmit
netStats["receive"][dev] = receive
}
return netStats, nil
}
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
devStats := map[string]string{}
for i, v := range parts {
devStats[header[i]] = v
}
return devStats, nil
}

View file

@ -3,12 +3,12 @@
package collector
import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/soundcloud/go-runit/runit"
)
type runitCollector struct {
name string
config Config
state prometheus.Gauge
stateDesired prometheus.Gauge
@ -16,12 +16,11 @@ type runitCollector struct {
}
func init() {
Factories = append(Factories, NewRunitCollector)
Factories["runit"] = NewRunitCollector
}
func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := runitCollector{
name: "runit_collector",
config: config,
state: prometheus.NewGauge(),
stateDesired: prometheus.NewGauge(),
@ -52,8 +51,6 @@ func NewRunitCollector(config Config, registry prometheus.Registry) (Collector,
return &c, nil
}
func (c *runitCollector) Name() string { return c.name }
func (c *runitCollector) Update() (updates int, err error) {
services, err := runit.GetServices("/etc/service")
if err != nil {
@ -63,11 +60,11 @@ func (c *runitCollector) Update() (updates int, err error) {
for _, service := range services {
status, err := service.Status()
if err != nil {
debug(c.Name(), "Couldn't get status for %s: %s, skipping...", service.Name, err)
glog.V(1).Infof("Couldn't get status for %s: %s, skipping...", service.Name, err)
continue
}
debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
labels := map[string]string{
"service": service.Name,
}

View file

@ -9,22 +9,25 @@ import (
"os"
"os/signal"
"runtime/pprof"
"strings"
"sync"
"syscall"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/exp"
"github.com/prometheus/node_exporter/collector"
)
var (
configFile = flag.String("config", "node_exporter.conf", "config file.")
memProfile = flag.String("memprofile", "", "write memory profile to this file")
listeningAddress = flag.String("listen", ":8080", "address to listen on")
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
scrapeDurations = prometheus.NewDefaultHistogram()
metricsUpdated = prometheus.NewGauge()
configFile = flag.String("config", "node_exporter.conf", "config file.")
memProfile = flag.String("memprofile", "", "write memory profile to this file")
listeningAddress = flag.String("listen", ":8080", "address to listen on")
enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma seperated list of collectors to use")
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
scrapeDurations = prometheus.NewDefaultHistogram()
metricsUpdated = prometheus.NewGauge()
)
func main() {
@ -38,9 +41,9 @@ func main() {
registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations)
registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated)
log.Printf("Registered collectors:")
for _, c := range collectors {
log.Print(" - ", c.Name())
glog.Infof("Enabled collectors:")
for n, _ := range collectors {
glog.Infof(" - %s", n)
}
sigHup := make(chan os.Signal)
@ -50,7 +53,7 @@ func main() {
go serveStatus(registry)
log.Printf("Starting initial collection")
glog.Infof("Starting initial collection")
collect(collectors)
tick := time.Tick(*interval)
@ -61,17 +64,17 @@ func main() {
if err != nil {
log.Fatalf("Couldn't load config and collectors: %s", err)
}
log.Printf("Reloaded collectors and config")
glog.Infof("Reloaded collectors and config")
tick = time.Tick(*interval)
case <-tick:
log.Printf("Starting new interval")
glog.Infof("Starting new interval")
collect(collectors)
case <-sigUsr1:
log.Printf("got signal")
glog.Infof("got signal")
if *memProfile != "" {
log.Printf("Writing memory profile to %s", *memProfile)
glog.Infof("Writing memory profile to %s", *memProfile)
f, err := os.Create(*memProfile)
if err != nil {
log.Fatal(err)
@ -84,25 +87,29 @@ func main() {
}
func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) {
collectors := []collector.Collector{}
func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) {
collectors := map[string]collector.Collector{}
config, err := getConfig(file)
if err != nil {
log.Fatalf("Couldn't read config %s: %s", file, err)
}
for _, fn := range collector.Factories {
for _, name := range strings.Split(*enabledCollectors, ",") {
fn, ok := collector.Factories[name]
if !ok {
log.Fatalf("Collector '%s' not available", name)
}
c, err := fn(*config, registry)
if err != nil {
return nil, err
}
collectors = append(collectors, c)
collectors[name] = c
}
return collectors, nil
}
func getConfig(file string) (*collector.Config, error) {
config := &collector.Config{}
log.Printf("Reading config %s", *configFile)
glog.Infof("Reading config %s", *configFile)
bytes, err := ioutil.ReadFile(*configFile)
if err != nil {
return nil, err
@ -115,31 +122,31 @@ func serveStatus(registry prometheus.Registry) {
http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux)
}
func collect(collectors []collector.Collector) {
func collect(collectors map[string]collector.Collector) {
wg := sync.WaitGroup{}
wg.Add(len(collectors))
for _, c := range collectors {
go func(c collector.Collector) {
Execute(c)
for n, c := range collectors {
go func(n string, c collector.Collector) {
Execute(n, c)
wg.Done()
}(c)
}(n, c)
}
wg.Wait()
}
func Execute(c collector.Collector) {
func Execute(name string, c collector.Collector) {
begin := time.Now()
updates, err := c.Update()
duration := time.Since(begin)
label := map[string]string{
"collector": c.Name(),
"collector": name,
}
if err != nil {
log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err)
glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err)
label["result"] = "error"
} else {
log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds())
glog.Infof("OK: %s success after %fs.", name, duration.Seconds())
label["result"] = "success"
}
scrapeDurations.Add(label, duration.Seconds())