diff --git a/retrieval/instrumentation.go b/retrieval/instrumentation.go index 4d94fa9de..978dfeeb4 100644 --- a/retrieval/instrumentation.go +++ b/retrieval/instrumentation.go @@ -42,10 +42,20 @@ var ( ReportablePercentiles: []float64{0.01, 0.05, 0.5, 0.90, 0.99}}) targetOperations = prometheus.NewCounter() + dnsSDLookupsCount = prometheus.NewCounter() ) +func recordOutcome(err error) { + message := success + if err != nil { + message = failure + } + dnsSDLookupsCount.Increment(map[string]string{outcome: message}) +} + func init() { prometheus.Register("prometheus_target_operations_total", "The total numbers of operations of the various targets that are being monitored.", prometheus.NilLabels, targetOperations) prometheus.Register("prometheus_target_operation_latency_ms", "The latencies for various target operations.", prometheus.NilLabels, targetOperationLatencies) prometheus.Register("prometheus_targetpool_duration_ms", "The durations for each TargetPool to retrieve state from all included entities.", prometheus.NilLabels, retrievalDurations) + prometheus.Register("prometheus_dns_sd_lookups_total", "The number of DNS-SD lookup successes/failures per pool.", prometheus.NilLabels, dnsSDLookupsCount) } diff --git a/retrieval/target_provider.go b/retrieval/target_provider.go index 491cf4f81..4c1ae823d 100644 --- a/retrieval/target_provider.go +++ b/retrieval/target_provider.go @@ -59,11 +59,15 @@ func NewSdTargetProvider(job config.JobConfig) *sdTargetProvider { } func (p *sdTargetProvider) Targets() ([]Target, error) { + var err error + defer func() { recordOutcome(err) }() + if time.Since(p.lastRefresh) < p.refreshInterval { return p.targets, nil } response, err := lookupSRV(p.job.GetSdName()) + if err != nil { return nil, err }