From a02e469b0727aed276ecd8e8d22de7f2b2447837 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Thu, 16 Mar 2017 17:21:00 +0000 Subject: [PATCH] Report collector success/failure and duration per scrape. (#516) This is in line with best practices, and also saves us 63 timeseries on a default Linux setup. --- collector/fixtures/e2e-output.txt | 31 ++++++++++++++++++++++++++++-- end-to-end-test.sh | 2 +- node_exporter.go | 32 +++++++++++++++++-------------- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 03cb2078..feac6c7c 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -457,8 +457,6 @@ node_edac_uncorrectable_errors_total{controller="0"} 5 node_entropy_available_bits 1337 # HELP node_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, and goversion from which node_exporter was built. # TYPE node_exporter_build_info gauge -# HELP node_exporter_scrape_duration_seconds node_exporter: Duration of a scrape job. -# TYPE node_exporter_scrape_duration_seconds summary # HELP node_filefd_allocated File descriptor statistics: allocated. # TYPE node_filefd_allocated gauge node_filefd_allocated 1024 @@ -2072,6 +2070,35 @@ node_procs_blocked 0 # HELP node_procs_running Number of processes in runnable state. # TYPE node_procs_running gauge node_procs_running 2 +# HELP node_scrape_collector_duration_seconds node_exporter: Duration of a collector scrape. +# TYPE node_scrape_collector_duration_seconds gauge +# HELP node_scrape_collector_success node_exporter: Whether a collector succeeded. +# TYPE node_scrape_collector_success gauge +node_scrape_collector_success{collector="bonding"} 1 +node_scrape_collector_success{collector="buddyinfo"} 1 +node_scrape_collector_success{collector="conntrack"} 1 +node_scrape_collector_success{collector="diskstats"} 1 +node_scrape_collector_success{collector="drbd"} 1 +node_scrape_collector_success{collector="edac"} 1 +node_scrape_collector_success{collector="entropy"} 1 +node_scrape_collector_success{collector="filefd"} 1 +node_scrape_collector_success{collector="hwmon"} 1 +node_scrape_collector_success{collector="infiniband"} 1 +node_scrape_collector_success{collector="ksmd"} 1 +node_scrape_collector_success{collector="loadavg"} 1 +node_scrape_collector_success{collector="mdadm"} 1 +node_scrape_collector_success{collector="megacli"} 1 +node_scrape_collector_success{collector="meminfo"} 1 +node_scrape_collector_success{collector="meminfo_numa"} 1 +node_scrape_collector_success{collector="mountstats"} 1 +node_scrape_collector_success{collector="netdev"} 1 +node_scrape_collector_success{collector="netstat"} 1 +node_scrape_collector_success{collector="nfs"} 1 +node_scrape_collector_success{collector="sockstat"} 1 +node_scrape_collector_success{collector="stat"} 1 +node_scrape_collector_success{collector="textfile"} 1 +node_scrape_collector_success{collector="wifi"} 1 +node_scrape_collector_success{collector="zfs"} 1 # HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse. # TYPE node_sockstat_FRAG_inuse gauge node_sockstat_FRAG_inuse 0 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 26a191ba..e0745563 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -35,7 +35,7 @@ cd "$(dirname $0)" port="$((10000 + (RANDOM % 10000)))" tmpdir=$(mktemp -d /tmp/node_exporter_e2e_test.XXXXXX) -skip_re="^(go_|node_exporter_|process_|node_textfile_mtime)" +skip_re="^(go_|node_exporter_build_info|node_scrape_collector_duration_seconds|process_|node_textfile_mtime)" keep=0; update=0; verbose=0 while getopts 'hkuv' opt diff --git a/node_exporter.go b/node_exporter.go index 1028ed8c..4a7f88a2 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -36,14 +36,17 @@ const ( ) var ( - scrapeDurations = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: collector.Namespace, - Subsystem: "exporter", - Name: "scrape_duration_seconds", - Help: "node_exporter: Duration of a scrape job.", - }, - []string{"collector", "result"}, + scrapeDurationDesc = prometheus.NewDesc( + prometheus.BuildFQName(collector.Namespace, "scrape", "collector_duration_seconds"), + "node_exporter: Duration of a collector scrape.", + []string{"collector"}, + nil, + ) + scrapeSuccessDesc = prometheus.NewDesc( + prometheus.BuildFQName(collector.Namespace, "scrape", "collector_success"), + "node_exporter: Whether a collector succeeded.", + []string{"collector"}, + nil, ) ) @@ -54,7 +57,8 @@ type NodeCollector struct { // Describe implements the prometheus.Collector interface. func (n NodeCollector) Describe(ch chan<- *prometheus.Desc) { - scrapeDurations.Describe(ch) + ch <- scrapeDurationDesc + ch <- scrapeSuccessDesc } // Collect implements the prometheus.Collector interface. @@ -68,7 +72,6 @@ func (n NodeCollector) Collect(ch chan<- prometheus.Metric) { }(name, c) } wg.Wait() - scrapeDurations.Collect(ch) } func filterAvailableCollectors(collectors string) string { @@ -86,16 +89,17 @@ func execute(name string, c collector.Collector, ch chan<- prometheus.Metric) { begin := time.Now() err := c.Update(ch) duration := time.Since(begin) - var result string + var success float64 if err != nil { log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err) - result = "error" + success = 0 } else { log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds()) - result = "success" + success = 1 } - scrapeDurations.WithLabelValues(name, result).Observe(duration.Seconds()) + ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name) + ch <- prometheus.MustNewConstMetric(scrapeSuccessDesc, prometheus.GaugeValue, success, name) } func loadCollectors(list string) (map[string]collector.Collector, error) {