Merge "add evalDuration histogram and ruleCount counter for rules"

This commit is contained in:
Julius Volz 2013-12-11 22:52:19 +01:00 committed by Gerrit Code Review
commit 3bf3a555b2
2 changed files with 33 additions and 8 deletions

View file

@ -14,6 +14,7 @@
package rules
import (
"fmt"
"sync"
"time"
@ -89,7 +90,7 @@ func (m *ruleManager) Run() {
case <-ticker.C:
start := time.Now()
m.runIteration(m.results)
evalDurations.Add(map[string]string{intervalKey: m.interval.String()}, float64(time.Since(start)/time.Millisecond))
iterationDuration.Add(map[string]string{intervalLabel: m.interval.String()}, float64(time.Since(start)/time.Millisecond))
case <-m.done:
glog.Info("Rule manager exiting...")
return
@ -146,7 +147,11 @@ func (m *ruleManager) runIteration(results chan<- *extraction.Result) {
// BUG(julius): Look at fixing thundering herd.
go func(rule Rule) {
defer wg.Done()
start := time.Now()
vector, err := rule.Eval(now, m.storage)
duration := time.Since(start)
samples := make(clientmodel.Samples, len(vector))
copy(samples, vector)
m.results <- &extraction.Result{
@ -154,8 +159,14 @@ func (m *ruleManager) runIteration(results chan<- *extraction.Result) {
Err: err,
}
if alertingRule, ok := rule.(*AlertingRule); ok {
m.queueAlertNotifications(alertingRule)
switch r := rule.(type) {
case *AlertingRule:
m.queueAlertNotifications(r)
recordOutcome(alertingRuleType, duration)
case *RecordingRule:
recordOutcome(recordingRuleType, duration)
default:
panic(fmt.Sprintf("Unknown rule type: %T", rule))
}
}(rule)
}

View file

@ -14,21 +14,35 @@
package rules
import (
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
intervalKey = "interval"
intervalLabel = "interval"
ruleTypeLabel = "rule_type"
alertingRuleType = "alerting"
recordingRuleType = "recording"
)
var (
evalDurations = prometheus.NewHistogram(&prometheus.HistogramSpecification{
evalDuration = prometheus.NewDefaultHistogram()
evalCount = prometheus.NewCounter()
iterationDuration = prometheus.NewHistogram(&prometheus.HistogramSpecification{
Starts: prometheus.LogarithmicSizedBucketsFor(0, 10000),
BucketBuilder: prometheus.AccumulatingBucketBuilder(prometheus.EvictAndReplaceWith(10, prometheus.AverageReducer), 100),
ReportablePercentiles: []float64{0.01, 0.05, 0.5, 0.90, 0.99}})
evalDuration = prometheus.NewCounter()
)
func init() {
prometheus.Register("prometheus_evaluator_duration_ms", "The duration for each evaluation pool to execute.", prometheus.NilLabels, evalDurations)
func recordOutcome(ruleType string, duration time.Duration) {
millisecondDuration := float64(duration / time.Millisecond)
evalCount.Increment(map[string]string{ruleTypeLabel: ruleType})
evalDuration.Add(map[string]string{ruleTypeLabel: ruleType}, millisecondDuration)
}
func init() {
prometheus.Register("prometheus_evaluator_duration_ms", "The duration for each evaluation pool to execute.", prometheus.NilLabels, iterationDuration)
prometheus.Register("prometheus_rule_evaluation_duration_ms", "The duration for a rule to execute.", prometheus.NilLabels, evalDuration)
prometheus.Register("prometheus_rule_evaluation_count", "The number of rules evaluated.", prometheus.NilLabels, evalCount)
}