Increase evaluation failures on Commit() (#8770)

I think we should increment the metric here, we're setting the rule
health anyways. This means even if the "evaluation" suceeded, none of
the samples made it to storage.

This is a simplified solution to: https://github.com/prometheus/prometheus/pull/8410/

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
This commit is contained in:
Goutham Veeramachaneni 2021-04-29 14:28:48 +02:00 committed by GitHub
parent 9d7d818629
commit 2efdf660b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -595,13 +595,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
if err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
// Canceled queries are intentional termination of queries. This normally
// happens on shutdown and thus we skip logging of any errors here.
if _, ok := err.(promql.ErrQueryCanceled); !ok {
level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
}
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
return
}
samplesTotal += float64(len(vector))
@ -620,6 +620,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
if err := app.Commit(); err != nil {
rule.SetHealth(HealthBad)
rule.SetLastError(err)
g.metrics.evalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(g.logger).Log("msg", "Rule sample appending failed", "err", err)
return