Introduce telemetry for rule evaluator durations.

This commit adds telemetry for the Prometheus expression rule evaluator, which will enable meta-Prometheus monitoring of customers to ensure that no instance is falling behind in answering routine queries. A few other sundry simplifications are introduced, too.
2025-03-05 20:59:13 -08:00 · 2013-05-23 21:29:27 +02:00 · 2013-05-23 21:29:27 +02:00 · c10780c966
parent 8507c58bf2
commit c10780c966
3 changed files with 50 additions and 13 deletions
--- a/rules/ast/ast.go
+++ b/rules/ast/ast.go
@ -37,11 +37,6 @@ type groupedAggregation struct {
 	groupCount int
 }
 type labelValuePair struct {
 	label model.LabelName
 	value model.LabelValue
 }
 // ----------------------------------------------------------------------------
 // Enums.
--- a/rules/manager.go
+++ b/rules/manager.go
@ -47,17 +47,21 @@ func NewRuleManager(results chan *Result, interval time.Duration, storage *metri
 		interval: interval,
 		storage:  storage,
 	}
 	// BUG(julius): Extract this so that the caller manages concurrency.
 	go manager.run(results)
 	return manager
 }
 func (m *ruleManager) run(results chan *Result) {
-	ticker := time.Tick(m.interval)
+	ticker := time.NewTicker(m.interval)
 	defer ticker.Stop()
 	for {
 		select {
-		case <-ticker:
+		case <-ticker.C:
 			start := time.Now()
 			m.runIteration(results)
 			evalDurations.Add(map[string]string{intervalKey: m.interval.String()}, float64(time.Since(start)/time.Millisecond))
 		case <-m.done:
 			log.Printf("RuleManager exiting...")
 			break
@ -66,27 +70,31 @@ func (m *ruleManager) run(results chan *Result) {
 }
 func (m *ruleManager) Stop() {
-	m.done <- true
+	select {
 	case m.done <- true:
 	default:
 	}
 }
 func (m *ruleManager) runIteration(results chan *Result) {
 	now := time.Now()
 	wg := sync.WaitGroup{}
 	for _, rule := range m.rules {
 		wg.Add(1)
 		// BUG(julius): Look at fixing thundering herd.
 		go func(rule Rule) {
 			defer wg.Done()
 			vector, err := rule.Eval(now, m.storage)
-			samples := model.Samples{}
+			samples := make(model.Samples, len(vector))
-			for _, sample := range vector {
+			copy(samples, vector)
 				samples = append(samples, sample)
 			}
 			m.results <- &Result{
 				Samples: samples,
 				Err:     err,
 			}
 			wg.Done()
 		}(rule)
 	}
 	wg.Wait()
 }
--- a/rules/telemetry.go
+++ b/rules/telemetry.go
@ -0,0 +1,34 @@
 // Copyright 2013 Prometheus Team
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package rules
 import (
 	"github.com/prometheus/client_golang/prometheus"
 )
 const (
 	intervalKey = "interval"
 )
 var (
 	evalDurations = prometheus.NewHistogram(&prometheus.HistogramSpecification{
 		Starts:                prometheus.LogarithmicSizedBucketsFor(0, 10000),
 		BucketBuilder:         prometheus.AccumulatingBucketBuilder(prometheus.EvictAndReplaceWith(10, prometheus.AverageReducer), 100),
 		ReportablePercentiles: []float64{0.01, 0.05, 0.5, 0.90, 0.99}})
 	evalDuration = prometheus.NewCounter()
 )
 func init() {
 	prometheus.Register("prometheus_evaluator_duration_ms", "The duration for each evaluation pool to execute.", prometheus.NilLabels, evalDurations)
 }