Export number of missed rule evaluations

In case the execution of all rules takes longer than the configured rule
evaluation interval, one or more iterations will be skipped. This needs
to be visible to the opterator.
This commit is contained in:
Tobias Schmidt 2017-04-02 20:03:28 -03:00
parent 5a896033e3
commit aaaba57184

View file

@ -75,10 +75,15 @@ var (
Name: "evaluator_iterations_skipped_total", Name: "evaluator_iterations_skipped_total",
Help: "The total number of rule group evaluations skipped due to throttled metric storage.", Help: "The total number of rule group evaluations skipped due to throttled metric storage.",
}) })
iterationsMissed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Name: "evaluator_iterations_missed_total",
Help: "The total number of rule group evaluations missed due to slow rule group evaluation.",
})
iterationsScheduled = prometheus.NewCounter(prometheus.CounterOpts{ iterationsScheduled = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace, Namespace: namespace,
Name: "evaluator_iterations_total", Name: "evaluator_iterations_total",
Help: "The total number of scheduled rule group evaluations, whether skipped or executed.", Help: "The total number of scheduled rule group evaluations, whether executed, missed or skipped.",
}) })
) )
@ -90,6 +95,7 @@ func init() {
prometheus.MustRegister(iterationDuration) prometheus.MustRegister(iterationDuration)
prometheus.MustRegister(iterationsSkipped) prometheus.MustRegister(iterationsSkipped)
prometheus.MustRegister(iterationsMissed)
prometheus.MustRegister(evalFailures) prometheus.MustRegister(evalFailures)
prometheus.MustRegister(evalDuration) prometheus.MustRegister(evalDuration)
} }
@ -158,6 +164,7 @@ func (g *Group) run() {
iterationDuration.Observe(time.Since(start).Seconds()) iterationDuration.Observe(time.Since(start).Seconds())
} }
lastTriggered := time.Now()
iter() iter()
tick := time.NewTicker(g.interval) tick := time.NewTicker(g.interval)
@ -172,6 +179,12 @@ func (g *Group) run() {
case <-g.done: case <-g.done:
return return
case <-tick.C: case <-tick.C:
missed := (time.Since(lastTriggered).Nanoseconds() / g.interval.Nanoseconds()) - 1
if missed > 0 {
iterationsMissed.Add(float64(missed))
iterationsScheduled.Add(float64(missed))
}
lastTriggered = time.Now()
iter() iter()
} }
} }