Merge pull request #13034 from charleskorn/rule-evaluation-logs

Include trace ID in log lines emitted during rule evaluation
This commit is contained in:
Julien Pivotto 2023-10-31 15:53:17 -04:00 committed by GitHub
commit 12dd0cb53b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -430,6 +430,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
} }
func(i int, rule Rule) { func(i int, rule Rule) {
logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i)
ctx, sp := otel.Tracer("").Start(ctx, "rule") ctx, sp := otel.Tracer("").Start(ctx, "rule")
sp.SetAttributes(attribute.String("name", rule.Name())) sp.SetAttributes(attribute.String("name", rule.Name()))
defer func(t time.Time) { defer func(t time.Time) {
@ -441,6 +442,10 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
rule.SetEvaluationTimestamp(t) rule.SetEvaluationTimestamp(t)
}(time.Now()) }(time.Now())
if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() {
logger = log.WithPrefix(logger, "traceID", sp.SpanContext().TraceID())
}
g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit())
@ -454,7 +459,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
// happens on shutdown and thus we skip logging of any errors here. // happens on shutdown and thus we skip logging of any errors here.
var eqc promql.ErrQueryCanceled var eqc promql.ErrQueryCanceled
if !errors.As(err, &eqc) { if !errors.As(err, &eqc) {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Evaluating rule failed", "rule", rule, "err", err) level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err)
} }
return return
} }
@ -480,7 +485,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
sp.SetStatus(codes.Error, err.Error()) sp.SetStatus(codes.Error, err.Error())
g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc()
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule sample appending failed", "err", err) level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err)
return return
} }
g.seriesInPreviousEval[i] = seriesReturned g.seriesInPreviousEval[i] = seriesReturned
@ -504,15 +509,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
switch { switch {
case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample): case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample):
numOutOfOrder++ numOutOfOrder++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s) level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrTooOldSample): case errors.Is(unwrappedErr, storage.ErrTooOldSample):
numTooOld++ numTooOld++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s) level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp): case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp):
numDuplicates++ numDuplicates++
level.Debug(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s) level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
default: default:
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Rule evaluation result discarded", "err", err, "sample", s) level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s)
} }
} else { } else {
buf := [1024]byte{} buf := [1024]byte{}
@ -520,13 +525,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
} }
} }
if numOutOfOrder > 0 { if numOutOfOrder > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder) level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "numDropped", numOutOfOrder)
} }
if numTooOld > 0 { if numTooOld > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld) level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "numDropped", numTooOld)
} }
if numDuplicates > 0 { if numDuplicates > 0 {
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates) level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "numDropped", numDuplicates)
} }
for metric, lset := range g.seriesInPreviousEval[i] { for metric, lset := range g.seriesInPreviousEval[i] {
@ -545,7 +550,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) {
// Do not count these in logging, as this is expected if series // Do not count these in logging, as this is expected if series
// is exposed from a different rule. // is exposed from a different rule.
default: default:
level.Warn(g.logger).Log("name", rule.Name(), "index", i, "msg", "Adding stale sample failed", "sample", lset.String(), "err", err) level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err)
} }
} }
} }