From 9a4ca68a610592e33b6b0231aea5b7579d76b7c5 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 31 Dec 2014 13:16:08 +0100 Subject: [PATCH 1/3] Add metrics for rule evaluation failures. Fixes https://github.com/prometheus/prometheus/issues/417 --- rules/manager/manager.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rules/manager/manager.go b/rules/manager/manager.go index 0c3324a65..d09a10388 100644 --- a/rules/manager/manager.go +++ b/rules/manager/manager.go @@ -49,6 +49,13 @@ var ( }, []string{ruleTypeLabel}, ) + evalFailures = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "rule_evaluation_failures_total", + Help: "The total number of rule evaluation failures.", + }, + ) iterationDuration = prometheus.NewSummary(prometheus.SummaryOpts{ Namespace: namespace, Name: "evaluator_duration_milliseconds", @@ -59,6 +66,7 @@ var ( func init() { prometheus.MustRegister(iterationDuration) + prometheus.MustRegister(evalFailures) prometheus.MustRegister(evalDuration) } @@ -229,6 +237,11 @@ func (m *ruleManager) runIteration(results chan<- *extraction.Result) { Timestamp: s.Timestamp, } } + + if err != nil { + evalFailures.Inc() + } + m.results <- &extraction.Result{ Samples: samples, Err: err, From d6b9e976553c2d589b0af3f7070fdaa3224ceffa Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 31 Dec 2014 14:01:19 +0100 Subject: [PATCH 2/3] Remove extraction.Result type, simplify code. --- main.go | 13 ++++++------- retrieval/helpers_test.go | 4 ++-- retrieval/ingester.go | 12 ++++++------ retrieval/target.go | 5 +---- retrieval/target_test.go | 26 ++++++++------------------ rules/manager/manager.go | 14 +++++--------- 6 files changed, 28 insertions(+), 46 deletions(-) diff --git a/main.go b/main.go index 701bce67c..d9bc32d1c 100644 --- a/main.go +++ b/main.go @@ -23,7 +23,6 @@ import ( "time" "github.com/golang/glog" - "github.com/prometheus/client_golang/extraction" clientmodel "github.com/prometheus/client_golang/model" registry "github.com/prometheus/client_golang/prometheus" @@ -81,7 +80,7 @@ var ( ) type prometheus struct { - unwrittenSamples chan *extraction.Result + unwrittenSamples chan clientmodel.Samples ruleManager manager.RuleManager targetManager retrieval.TargetManager @@ -102,7 +101,7 @@ func NewPrometheus() *prometheus { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } - unwrittenSamples := make(chan *extraction.Result, *samplesQueueCapacity) + unwrittenSamples := make(chan clientmodel.Samples, *samplesQueueCapacity) ingester := &retrieval.MergeLabelsIngester{ Labels: conf.GlobalLabels(), @@ -214,11 +213,11 @@ func (p *prometheus) Serve() { } }() - for block := range p.unwrittenSamples { - if block.Err == nil && len(block.Samples) > 0 { - p.storage.AppendSamples(block.Samples) + for samples := range p.unwrittenSamples { + if len(samples) > 0 { + p.storage.AppendSamples(samples) if p.remoteTSDBQueue != nil { - p.remoteTSDBQueue.Queue(block.Samples) + p.remoteTSDBQueue.Queue(samples) } } } diff --git a/retrieval/helpers_test.go b/retrieval/helpers_test.go index 139bd19eb..3777fb302 100644 --- a/retrieval/helpers_test.go +++ b/retrieval/helpers_test.go @@ -14,11 +14,11 @@ package retrieval import ( - "github.com/prometheus/client_golang/extraction" + clientmodel "github.com/prometheus/client_golang/model" ) type nopIngester struct{} -func (i nopIngester) Ingest(*extraction.Result) error { +func (i nopIngester) Ingest(clientmodel.Samples) error { return nil } diff --git a/retrieval/ingester.go b/retrieval/ingester.go index c8f6f8d43..50f66beca 100644 --- a/retrieval/ingester.go +++ b/retrieval/ingester.go @@ -31,19 +31,19 @@ type MergeLabelsIngester struct { // Ingest ingests the provided extraction result by merging in i.Labels and then // handing it over to i.Ingester. -func (i *MergeLabelsIngester) Ingest(r *extraction.Result) error { - for _, s := range r.Samples { +func (i *MergeLabelsIngester) Ingest(samples clientmodel.Samples) error { + for _, s := range samples { s.Metric.MergeFromLabelSet(i.Labels, i.CollisionPrefix) } - return i.Ingester.Ingest(r) + return i.Ingester.Ingest(samples) } // ChannelIngester feeds results into a channel without modifying them. -type ChannelIngester chan<- *extraction.Result +type ChannelIngester chan<- clientmodel.Samples // Ingest ingests the provided extraction result by sending it to i. -func (i ChannelIngester) Ingest(r *extraction.Result) error { - i <- r +func (i ChannelIngester) Ingest(s clientmodel.Samples) error { + i <- s return nil } diff --git a/retrieval/target.go b/retrieval/target.go index 66b378318..2ae527f0f 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -205,10 +205,7 @@ func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clie Value: clientmodel.SampleValue(float64(scrapeDuration) / float64(time.Second)), } - ingester.Ingest(&extraction.Result{ - Err: nil, - Samples: clientmodel.Samples{healthSample, durationSample}, - }) + ingester.Ingest(clientmodel.Samples{healthSample, durationSample}) } // RunScraper implements Target. diff --git a/retrieval/target_test.go b/retrieval/target_test.go index 73492ce93..0e96d205e 100644 --- a/retrieval/target_test.go +++ b/retrieval/target_test.go @@ -22,17 +22,15 @@ import ( clientmodel "github.com/prometheus/client_golang/model" - "github.com/prometheus/client_golang/extraction" - "github.com/prometheus/prometheus/utility" ) type collectResultIngester struct { - result *extraction.Result + result clientmodel.Samples } -func (i *collectResultIngester) Ingest(r *extraction.Result) error { - i.result = r +func (i *collectResultIngester) Ingest(s clientmodel.Samples) error { + i.result = s return nil } @@ -57,15 +55,15 @@ func TestTargetRecordScrapeHealth(t *testing.T) { now := clientmodel.Now() ingester := &collectResultIngester{} - testTarget.recordScrapeHealth(ingester, now, true, 2 * time.Second) + testTarget.recordScrapeHealth(ingester, now, true, 2*time.Second) result := ingester.result - if len(result.Samples) != 2 { - t.Fatalf("Expected two samples, got %d", len(result.Samples)) + if len(result) != 2 { + t.Fatalf("Expected two samples, got %d", len(result)) } - actual := result.Samples[0] + actual := result[0] expected := &clientmodel.Sample{ Metric: clientmodel.Metric{ clientmodel.MetricNameLabel: scrapeHealthMetricName, @@ -76,15 +74,11 @@ func TestTargetRecordScrapeHealth(t *testing.T) { Value: 1, } - if result.Err != nil { - t.Fatalf("Got unexpected error: %v", result.Err) - } - if !actual.Equal(expected) { t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) } - actual = result.Samples[1] + actual = result[1] expected = &clientmodel.Sample{ Metric: clientmodel.Metric{ clientmodel.MetricNameLabel: scrapeDurationMetricName, @@ -95,10 +89,6 @@ func TestTargetRecordScrapeHealth(t *testing.T) { Value: 2.0, } - if result.Err != nil { - t.Fatalf("Got unexpected error: %v", result.Err) - } - if !actual.Equal(expected) { t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) } diff --git a/rules/manager/manager.go b/rules/manager/manager.go index d09a10388..0a5bf65c0 100644 --- a/rules/manager/manager.go +++ b/rules/manager/manager.go @@ -19,7 +19,6 @@ import ( "time" "github.com/golang/glog" - "github.com/prometheus/client_golang/extraction" "github.com/prometheus/client_golang/prometheus" clientmodel "github.com/prometheus/client_golang/model" @@ -95,7 +94,7 @@ type ruleManager struct { interval time.Duration storage local.Storage - results chan<- *extraction.Result + results chan<- clientmodel.Samples notificationHandler *notification.NotificationHandler prometheusURL string @@ -107,7 +106,7 @@ type RuleManagerOptions struct { Storage local.Storage NotificationHandler *notification.NotificationHandler - Results chan<- *extraction.Result + Results chan<- clientmodel.Samples PrometheusURL string } @@ -210,7 +209,7 @@ func (m *ruleManager) queueAlertNotifications(rule *rules.AlertingRule, timestam m.notificationHandler.SubmitReqs(notifications) } -func (m *ruleManager) runIteration(results chan<- *extraction.Result) { +func (m *ruleManager) runIteration(results chan<- clientmodel.Samples) { now := clientmodel.Now() wg := sync.WaitGroup{} @@ -240,11 +239,8 @@ func (m *ruleManager) runIteration(results chan<- *extraction.Result) { if err != nil { evalFailures.Inc() - } - - m.results <- &extraction.Result{ - Samples: samples, - Err: err, + } else { + m.results <- samples } switch r := rule.(type) { From bb1e49383e07c8623c39edf545c8f45f92ab0279 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 8 Jan 2015 16:57:25 +0100 Subject: [PATCH 3/3] Log rule evalation errors. --- rules/manager/manager.go | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/manager/manager.go b/rules/manager/manager.go index 0a5bf65c0..9f64efcd5 100644 --- a/rules/manager/manager.go +++ b/rules/manager/manager.go @@ -239,6 +239,7 @@ func (m *ruleManager) runIteration(results chan<- clientmodel.Samples) { if err != nil { evalFailures.Inc() + glog.Warningf("Error while evaluating rule %q: %s", rule, err) } else { m.results <- samples }