Report collector success/failure and duration per scrape. (#516)

This is in line with best practices, and also saves us
63 timeseries on a default Linux setup.
This commit is contained in:
Brian Brazil 2017-03-16 17:21:00 +00:00 committed by GitHub
parent 7426dc9460
commit a02e469b07
3 changed files with 48 additions and 17 deletions

View file

@ -457,8 +457,6 @@ node_edac_uncorrectable_errors_total{controller="0"} 5
node_entropy_available_bits 1337 node_entropy_available_bits 1337
# HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, and goversion from which node_exporter was built. # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, and goversion from which node_exporter was built.
# TYPE node_exporter_build_info gauge # TYPE node_exporter_build_info gauge
# HELP node_exporter_scrape_duration_seconds node_exporter: Duration of a scrape job.
# TYPE node_exporter_scrape_duration_seconds summary
# HELP node_filefd_allocated File descriptor statistics: allocated. # HELP node_filefd_allocated File descriptor statistics: allocated.
# TYPE node_filefd_allocated gauge # TYPE node_filefd_allocated gauge
node_filefd_allocated 1024 node_filefd_allocated 1024
@ -2072,6 +2070,35 @@ node_procs_blocked 0
# HELP node_procs_running Number of processes in runnable state. # HELP node_procs_running Number of processes in runnable state.
# TYPE node_procs_running gauge # TYPE node_procs_running gauge
node_procs_running 2 node_procs_running 2
# HELP node_scrape_collector_duration_seconds node_exporter: Duration of a collector scrape.
# TYPE node_scrape_collector_duration_seconds gauge
# HELP node_scrape_collector_success node_exporter: Whether a collector succeeded.
# TYPE node_scrape_collector_success gauge
node_scrape_collector_success{collector="bonding"} 1
node_scrape_collector_success{collector="buddyinfo"} 1
node_scrape_collector_success{collector="conntrack"} 1
node_scrape_collector_success{collector="diskstats"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1
node_scrape_collector_success{collector="infiniband"} 1
node_scrape_collector_success{collector="ksmd"} 1
node_scrape_collector_success{collector="loadavg"} 1
node_scrape_collector_success{collector="mdadm"} 1
node_scrape_collector_success{collector="megacli"} 1
node_scrape_collector_success{collector="meminfo"} 1
node_scrape_collector_success{collector="meminfo_numa"} 1
node_scrape_collector_success{collector="mountstats"} 1
node_scrape_collector_success{collector="netdev"} 1
node_scrape_collector_success{collector="netstat"} 1
node_scrape_collector_success{collector="nfs"} 1
node_scrape_collector_success{collector="sockstat"} 1
node_scrape_collector_success{collector="stat"} 1
node_scrape_collector_success{collector="textfile"} 1
node_scrape_collector_success{collector="wifi"} 1
node_scrape_collector_success{collector="zfs"} 1
# HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse. # HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse.
# TYPE node_sockstat_FRAG_inuse gauge # TYPE node_sockstat_FRAG_inuse gauge
node_sockstat_FRAG_inuse 0 node_sockstat_FRAG_inuse 0

View file

@ -35,7 +35,7 @@ cd "$(dirname $0)"
port="$((10000 + (RANDOM % 10000)))" port="$((10000 + (RANDOM % 10000)))"
tmpdir=$(mktemp -d /tmp/node_exporter_e2e_test.XXXXXX) tmpdir=$(mktemp -d /tmp/node_exporter_e2e_test.XXXXXX)
skip_re="^(go_|node_exporter_|process_|node_textfile_mtime)" skip_re="^(go_|node_exporter_build_info|node_scrape_collector_duration_seconds|process_|node_textfile_mtime)"
keep=0; update=0; verbose=0 keep=0; update=0; verbose=0
while getopts 'hkuv' opt while getopts 'hkuv' opt

View file

@ -36,14 +36,17 @@ const (
) )
var ( var (
scrapeDurations = prometheus.NewSummaryVec( scrapeDurationDesc = prometheus.NewDesc(
prometheus.SummaryOpts{ prometheus.BuildFQName(collector.Namespace, "scrape", "collector_duration_seconds"),
Namespace: collector.Namespace, "node_exporter: Duration of a collector scrape.",
Subsystem: "exporter", []string{"collector"},
Name: "scrape_duration_seconds", nil,
Help: "node_exporter: Duration of a scrape job.", )
}, scrapeSuccessDesc = prometheus.NewDesc(
[]string{"collector", "result"}, prometheus.BuildFQName(collector.Namespace, "scrape", "collector_success"),
"node_exporter: Whether a collector succeeded.",
[]string{"collector"},
nil,
) )
) )
@ -54,7 +57,8 @@ type NodeCollector struct {
// Describe implements the prometheus.Collector interface. // Describe implements the prometheus.Collector interface.
func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) { func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) {
scrapeDurations.Describe(ch) ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
} }
// Collect implements the prometheus.Collector interface. // Collect implements the prometheus.Collector interface.
@ -68,7 +72,6 @@ func (n NodeCollector) Collect(ch chan<- prometheus.Metric) {
}(name, c) }(name, c)
} }
wg.Wait() wg.Wait()
scrapeDurations.Collect(ch)
} }
func filterAvailableCollectors(collectors string) string { func filterAvailableCollectors(collectors string) string {
@ -86,16 +89,17 @@ func execute(name string, c collector.Collector, ch chan<- prometheus.Metric) {
begin := time.Now() begin := time.Now()
err := c.Update(ch) err := c.Update(ch)
duration := time.Since(begin) duration := time.Since(begin)
var result string var success float64
if err != nil { if err != nil {
log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err) log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err)
result = "error" success = 0
} else { } else {
log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds()) log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds())
result = "success" success = 1
} }
scrapeDurations.WithLabelValues(name, result).Observe(duration.Seconds()) ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name)
ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name)
} }
func loadCollectors(list string) (map[string]collector.Collector, error) { func loadCollectors(list string) (map[string]collector.Collector, error) {