Merge pull request #13034 from charleskorn/rule-evaluation-logs

Include trace ID in log lines emitted during rule evaluation
This commit is contained in:
Julien Pivotto 2023-10-31 15:53:17 -04:00 committed by GitHub
commit 12dd0cb53b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -430,6 +430,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
}
func(i int, rule Rule) {
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
ctx, sp := otel.Tracer("").Start(ctx, "rule")
sp.SetAttributes(attribute.String("name", rule.Name()))
defer func(t time.Time) {
@ -441,6 +442,10 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
rule.SetEvaluationTimestamp(t)
}(time.Now())
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
logger = log.WithPrefix(logger, "traceID", sp.SpanContext().TraceID())
}
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
@ -454,7 +459,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
// happens on shutdown and thus we skip logging of any errors here.
var eqc promql.ErrQueryCanceled
if !errors.As(err, &eqc) {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Evaluating rule failed", "rule", rule, "err", err)
level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
}
return
}
@ -480,7 +485,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule sample appending failed", "err", err)
level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
return
}
g.seriesInPreviousEval[i] = seriesReturned
@ -504,15 +509,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
switch {
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
numOutOfOrder++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrTooOldSample):
numTooOld++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
numDuplicates++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
default:
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s)
level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
}
} else {
buf := [1024]byte{}
@ -520,13 +525,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
}
}
if numOutOfOrder > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
}
if numTooOld > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
}
if numDuplicates > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
}
for metric, lset := range g.seriesInPreviousEval[i] {
@ -545,7 +550,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
// Do not count these in logging, as this is expected if series
// is exposed from a different rule.
default:
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
}
}
}