Merge pull request #429 from brian-brazil/scrape-time

Have scrape time as a pseudovariable, not a prometheus variable.
This commit is contained in:
juliusv 2015-01-02 13:22:04 +01:00
commit 917acb6baf
7 changed files with 71 additions and 58 deletions

View file

@ -36,30 +36,19 @@ const (
InstanceLabel clientmodel.LabelName = "instance" InstanceLabel clientmodel.LabelName = "instance"
// ScrapeHealthMetricName is the metric name for the synthetic health // ScrapeHealthMetricName is the metric name for the synthetic health
// variable. // variable.
ScrapeHealthMetricName clientmodel.LabelValue = "up" scrapeHealthMetricName clientmodel.LabelValue = "up"
// ScrapeTimeMetricName is the metric name for the synthetic scrape duration
// variable.
scrapeDurationMetricName clientmodel.LabelValue = "scrape_duration_seconds"
// Constants for instrumentation. // Constants for instrumentation.
namespace = "prometheus" namespace = "prometheus"
job = "target_job"
instance = "target_instance"
failure = "failure"
outcome = "outcome"
success = "success"
interval = "interval" interval = "interval"
) )
var ( var (
localhostRepresentations = []string{"http://127.0.0.1", "http://localhost"} localhostRepresentations = []string{"http://127.0.0.1", "http://localhost"}
targetOperationLatencies = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "target_operation_latency_milliseconds",
Help: "The latencies for target operations.",
Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99},
},
[]string{job, instance, outcome},
)
targetIntervalLength = prometheus.NewSummaryVec( targetIntervalLength = prometheus.NewSummaryVec(
prometheus.SummaryOpts{ prometheus.SummaryOpts{
Namespace: namespace, Namespace: namespace,
@ -72,7 +61,6 @@ var (
) )
func init() { func init() {
prometheus.MustRegister(targetOperationLatencies)
prometheus.MustRegister(targetIntervalLength) prometheus.MustRegister(targetIntervalLength)
} }
@ -189,28 +177,37 @@ func NewTarget(url string, deadline time.Duration, baseLabels clientmodel.LabelS
return target return target
} }
func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool) { func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool, scrapeDuration time.Duration) {
metric := clientmodel.Metric{} healthMetric := clientmodel.Metric{}
durationMetric := clientmodel.Metric{}
for label, value := range t.baseLabels { for label, value := range t.baseLabels {
metric[label] = value healthMetric[label] = value
durationMetric[label] = value
} }
metric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(ScrapeHealthMetricName) healthMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeHealthMetricName)
metric[InstanceLabel] = clientmodel.LabelValue(t.URL()) durationMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeDurationMetricName)
healthMetric[InstanceLabel] = clientmodel.LabelValue(t.URL())
durationMetric[InstanceLabel] = clientmodel.LabelValue(t.URL())
healthValue := clientmodel.SampleValue(0) healthValue := clientmodel.SampleValue(0)
if healthy { if healthy {
healthValue = clientmodel.SampleValue(1) healthValue = clientmodel.SampleValue(1)
} }
sample := &clientmodel.Sample{ healthSample := &clientmodel.Sample{
Metric: metric, Metric: healthMetric,
Timestamp: timestamp, Timestamp: timestamp,
Value: healthValue, Value: healthValue,
} }
durationSample := &clientmodel.Sample{
Metric: durationMetric,
Timestamp: timestamp,
Value: clientmodel.SampleValue(float64(scrapeDuration) / float64(time.Second)),
}
ingester.Ingest(&extraction.Result{ ingester.Ingest(&extraction.Result{
Err: nil, Err: nil,
Samples: clientmodel.Samples{sample}, Samples: clientmodel.Samples{healthSample, durationSample},
}) })
} }
@ -292,23 +289,15 @@ const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client
func (t *target) scrape(ingester extraction.Ingester) (err error) { func (t *target) scrape(ingester extraction.Ingester) (err error) {
timestamp := clientmodel.Now() timestamp := clientmodel.Now()
defer func(start time.Time) { defer func(start time.Time) {
ms := float64(time.Since(start)) / float64(time.Millisecond)
labels := prometheus.Labels{
job: string(t.baseLabels[clientmodel.JobLabel]),
instance: t.URL(),
outcome: success,
}
t.Lock() // Writing t.state and t.lastError requires the lock. t.Lock() // Writing t.state and t.lastError requires the lock.
if err == nil { if err == nil {
t.state = Alive t.state = Alive
labels[outcome] = failure
} else { } else {
t.state = Unreachable t.state = Unreachable
} }
t.lastError = err t.lastError = err
t.Unlock() t.Unlock()
targetOperationLatencies.With(labels).Observe(ms) t.recordScrapeHealth(ingester, timestamp, err == nil, time.Since(start))
t.recordScrapeHealth(ingester, timestamp, err == nil)
}(time.Now()) }(time.Now())
req, err := http.NewRequest("GET", t.URL(), nil) req, err := http.NewRequest("GET", t.URL(), nil)

View file

@ -33,17 +33,22 @@ import (
const resolvConf = "/etc/resolv.conf" const resolvConf = "/etc/resolv.conf"
var ( var (
dnsSDLookupsCount = prometheus.NewCounterVec( dnsSDLookupsCount = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Name: "dns_sd_lookups_total", Name: "dns_sd_lookups_total",
Help: "The number of DNS-SD lookup successes/failures per pool.", Help: "The number of DNS-SD lookups.",
}, })
[]string{outcome}, dnsSDLookupFailuresCount = prometheus.NewCounter(
) prometheus.CounterOpts{
Namespace: namespace,
Name: "dns_sd_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
})
) )
func init() { func init() {
prometheus.MustRegister(dnsSDLookupFailuresCount)
prometheus.MustRegister(dnsSDLookupsCount) prometheus.MustRegister(dnsSDLookupsCount)
} }
@ -77,11 +82,10 @@ func NewSdTargetProvider(job config.JobConfig) *sdTargetProvider {
func (p *sdTargetProvider) Targets() ([]Target, error) { func (p *sdTargetProvider) Targets() ([]Target, error) {
var err error var err error
defer func() { defer func() {
message := success dnsSDLookupsCount.Inc()
if err != nil { if err != nil {
message = failure dnsSDLookupFailuresCount.Inc()
} }
dnsSDLookupsCount.WithLabelValues(message).Inc()
}() }()
if time.Since(p.lastRefresh) < p.refreshInterval { if time.Since(p.lastRefresh) < p.refreshInterval {

View file

@ -57,18 +57,18 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
now := clientmodel.Now() now := clientmodel.Now()
ingester := &collectResultIngester{} ingester := &collectResultIngester{}
testTarget.recordScrapeHealth(ingester, now, true) testTarget.recordScrapeHealth(ingester, now, true, 2 * time.Second)
result := ingester.result result := ingester.result
if len(result.Samples) != 1 { if len(result.Samples) != 2 {
t.Fatalf("Expected one sample, got %d", len(result.Samples)) t.Fatalf("Expected two samples, got %d", len(result.Samples))
} }
actual := result.Samples[0] actual := result.Samples[0]
expected := &clientmodel.Sample{ expected := &clientmodel.Sample{
Metric: clientmodel.Metric{ Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: ScrapeHealthMetricName, clientmodel.MetricNameLabel: scrapeHealthMetricName,
InstanceLabel: "http://example.url", InstanceLabel: "http://example.url",
clientmodel.JobLabel: "testjob", clientmodel.JobLabel: "testjob",
}, },
@ -83,6 +83,25 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
if !actual.Equal(expected) { if !actual.Equal(expected) {
t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
} }
actual = result.Samples[1]
expected = &clientmodel.Sample{
Metric: clientmodel.Metric{
clientmodel.MetricNameLabel: scrapeDurationMetricName,
InstanceLabel: "http://example.url",
clientmodel.JobLabel: "testjob",
},
Timestamp: now,
Value: 2.0,
}
if result.Err != nil {
t.Fatalf("Got unexpected error: %v", result.Err)
}
if !actual.Equal(expected) {
t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual)
}
} }
func TestTargetScrapeTimeout(t *testing.T) { func TestTargetScrapeTimeout(t *testing.T) {

View file

@ -15,6 +15,7 @@ package retrieval
import ( import (
"sync" "sync"
"github.com/golang/glog" "github.com/golang/glog"
"github.com/prometheus/client_golang/extraction" "github.com/prometheus/client_golang/extraction"

View file

@ -869,7 +869,7 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap
iter := fingerprintToSeries.iter() iter := fingerprintToSeries.iter()
defer func() { defer func() {
// Consume the iterator in any case to not leak goroutines. // Consume the iterator in any case to not leak goroutines.
for _ = range iter { for range iter {
} }
}() }()

View file

@ -567,7 +567,7 @@ func (s *memorySeriesStorage) cycleThroughMemoryFingerprints() chan clientmodel.
defer func() { defer func() {
if fpIter != nil { if fpIter != nil {
for _ = range fpIter { for range fpIter {
// Consume the iterator. // Consume the iterator.
} }
} }
@ -661,9 +661,9 @@ loop:
} }
} }
// Wait until both channels are closed. // Wait until both channels are closed.
for _ = range memoryFingerprints { for range memoryFingerprints {
} }
for _ = range archivedFingerprints { for range archivedFingerprints {
} }
} }