From f4cd11318c9504eae47d9b543e20b56128bc0c4f Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Tue, 12 Dec 2023 18:46:59 +0100 Subject: [PATCH] PostingsForMatchersCache: don't create new spans, reduce logging verbosity Creating a new span for each promQL matcher for each query can end up being too verbose to navigate in grafana. This PR replaces the span with a trace log. Overall starting a span was also expensive - according to profiles it's about 9% of the CPU of PostingsForMatchers and it's about 25% of the memory allocations. Adding events is about 2% of the CPU and ~5% of the allocations. In addition to that it removes some log lines in `postingsForMatchersPromise` because I think they just log the same thing multiple times. I also removed some calls to AddEvent to amortize the cost of tracing further. --- tsdb/postings_for_matchers_cache.go | 41 +++++++++-------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/tsdb/postings_for_matchers_cache.go b/tsdb/postings_for_matchers_cache.go index bb4aba661e..c09d28fce5 100644 --- a/tsdb/postings_for_matchers_cache.go +++ b/tsdb/postings_for_matchers_cache.go @@ -3,6 +3,7 @@ package tsdb import ( "container/list" "context" + "fmt" "strings" "sync" "time" @@ -87,12 +88,13 @@ type PostingsForMatchersCache struct { } func (c *PostingsForMatchersCache) PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { - ctx, span := c.tracer.Start(ctx, "PostingsForMatchersCache.PostingsForMatchers", trace.WithAttributes( - attribute.Bool("concurrent", concurrent), - c.ttlAttrib, - c.forceAttrib, - )) - defer span.End() + span := trace.SpanFromContext(ctx) + defer func(startTime time.Time) { + span.AddEvent( + "returned postings", + trace.WithAttributes(attribute.Bool("concurrent", concurrent), c.ttlAttrib, c.forceAttrib, attribute.Stringer("duration", time.Since(startTime))), + ) + }(time.Now()) if !concurrent && !c.force { span.AddEvent("cache not used") @@ -122,30 +124,18 @@ type postingsForMatcherPromise struct { } func (p *postingsForMatcherPromise) result(ctx context.Context) (index.Postings, error) { - span := trace.SpanFromContext(ctx) - select { case <-ctx.Done(): - span.AddEvent("interrupting wait on postingsForMatchers promise due to context error", trace.WithAttributes( - attribute.String("err", ctx.Err().Error()), - )) - return nil, ctx.Err() + return nil, fmt.Errorf("interrupting wait on postingsForMatchers promise due to context error: %w", ctx.Err()) case <-p.done: // Checking context error is necessary for deterministic tests, // as channel selection order is random if ctx.Err() != nil { - span.AddEvent("completed postingsForMatchers promise, but context has error", trace.WithAttributes( - attribute.String("err", ctx.Err().Error()), - )) - return nil, ctx.Err() + return nil, fmt.Errorf("completed postingsForMatchers promise, but context has error: %w", ctx.Err()) } if p.err != nil { - span.AddEvent("postingsForMatchers promise completed with error", trace.WithAttributes( - attribute.String("err", p.err.Error()), - )) - return nil, p.err + return nil, fmt.Errorf("postingsForMatchers promise completed with error: %w", p.err) } - span.AddEvent("postingsForMatchers promise completed successfully") return p.cloner.Clone(), nil } } @@ -168,7 +158,7 @@ func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Contex return oldPromise.(*postingsForMatcherPromise).result } - span.AddEvent("no postingsForMatchers promise in cache, executing query") + span.AddEvent("no postingsForMatchers promise in cache, executing query", trace.WithAttributes(attribute.String("cache_key", key))) // promise was stored, close its channel after fulfilment defer close(promise.done) @@ -178,15 +168,8 @@ func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Contex // FIXME: do we need to cancel the call to postingsForMatchers if all the callers waiting for the result have // cancelled their context? if postings, err := c.postingsForMatchers(context.Background(), ix, ms...); err != nil { - span.AddEvent("postingsForMatchers failed", trace.WithAttributes( - attribute.String("cache_key", key), - attribute.String("err", err.Error()), - )) promise.err = err } else { - span.AddEvent("postingsForMatchers succeeded", trace.WithAttributes( - attribute.String("cache_key", key), - )) promise.cloner = index.NewPostingsCloner(postings) }