From e01b6cdb448971c5bd03899abff15ee0e9755663 Mon Sep 17 00:00:00 2001
From: "Matt T. Proud" <matt.proud@gmail.com>
Date: Mon, 28 Jan 2013 16:36:28 +0100
Subject: [PATCH] Duration statistics for each target pool.

We have an open question of how long does it take for each target
pool to have the state retrieved from all participating elements.
This commit starts by providing insight into this.
---
 retrieval/instrumentation.go |  6 +++++-
 retrieval/targetpool.go      | 27 +++++++++++++++++++++++----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/retrieval/instrumentation.go b/retrieval/instrumentation.go
index ecbf231a3e..2142376224 100644
--- a/retrieval/instrumentation.go
+++ b/retrieval/instrumentation.go
@@ -38,7 +38,10 @@ var (
 
 	targetOperationLatencies = metrics.NewHistogram(networkLatencyHistogram)
 
-	// TODO: Include durations partitioned by target pool intervals.
+	retrievalDurations = metrics.NewHistogram(&metrics.HistogramSpecification{
+		Starts:                metrics.LogarithmicSizedBucketsFor(0, 10000),
+		BucketBuilder:         metrics.AccumulatingBucketBuilder(metrics.EvictAndReplaceWith(10, maths.Average), 100),
+		ReportablePercentiles: []float64{0.01, 0.05, 0.5, 0.90, 0.99}})
 
 	targetOperations = metrics.NewCounter()
 )
@@ -46,4 +49,5 @@ var (
 func init() {
 	registry.Register("prometheus_target_operations_total", "The total numbers of operations of the various targets that are being monitored.", registry.NilLabels, targetOperations)
 	registry.Register("prometheus_target_operation_latency_ms", "The latencies for various target operations.", registry.NilLabels, targetOperationLatencies)
+	registry.Register("prometheus_targetpool_duration_ms", "The durations for each TargetPool to retrieve state from all included entities.", registry.NilLabels, retrievalDurations)
 }
diff --git a/retrieval/targetpool.go b/retrieval/targetpool.go
index 1737709dd3..ccc3647804 100644
--- a/retrieval/targetpool.go
+++ b/retrieval/targetpool.go
@@ -7,10 +7,14 @@ import (
 	"time"
 )
 
+const (
+	intervalKey = "interval"
+)
+
 type TargetPool struct {
 	done    chan bool
-	targets []Target
 	manager TargetManager
+	targets []Target
 }
 
 func NewTargetPool(m TargetManager) (p *TargetPool) {
@@ -51,7 +55,7 @@ func (p *TargetPool) Run(results chan format.Result, interval time.Duration) {
 	for {
 		select {
 		case <-ticker:
-			p.runIteration(results)
+			p.runIteration(results, interval)
 		case <-p.done:
 			log.Printf("TargetPool exiting...")
 			break
@@ -70,8 +74,13 @@ func (p *TargetPool) runSingle(earliest time.Time, results chan format.Result, t
 	t.Scrape(earliest, results)
 }
 
-func (p *TargetPool) runIteration(results chan format.Result) {
-	for i := 0; i < p.Len(); i++ {
+func (p *TargetPool) runIteration(results chan format.Result, interval time.Duration) {
+	begin := time.Now()
+
+	targetCount := p.Len()
+	finished := make(chan bool, targetCount)
+
+	for i := 0; i < targetCount; i++ {
 		target := heap.Pop(p).(Target)
 		if target == nil {
 			break
@@ -88,6 +97,16 @@ func (p *TargetPool) runIteration(results chan format.Result) {
 		go func() {
 			p.runSingle(now, results, target)
 			heap.Push(p, target)
+			finished <- true
 		}()
 	}
+
+	for i := 0; i < targetCount; i++ {
+		<-finished
+	}
+
+	close(finished)
+
+	duration := float64(time.Now().Sub(begin) / time.Millisecond)
+	retrievalDurations.Add(map[string]string{intervalKey: interval.String()}, duration)
 }