From d78253319daa62c8f28ed47e40bafcad2dd8b586 Mon Sep 17 00:00:00 2001 From: Piotr <17101802+thampiotr@users.noreply.github.com> Date: Fri, 21 Jun 2024 00:45:13 +0100 Subject: [PATCH] queue_manager: add histogram info to error logs (#14326) Signed-off-by: Piotr Gwizdala <17101802+thampiotr@users.noreply.github.com> --- storage/remote/queue_manager.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index b244b331b..488485e38 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -16,6 +16,7 @@ package remote import ( "context" "errors" + "fmt" "math" "strconv" "sync" @@ -1224,12 +1225,16 @@ func (s *shards) stop() { // Force an unclean shutdown. s.hardShutdown() <-s.done - if dropped := s.samplesDroppedOnHardShutdown.Load(); dropped > 0 { - level.Error(s.qm.logger).Log("msg", "Failed to flush all samples on shutdown", "count", dropped) - } - if dropped := s.exemplarsDroppedOnHardShutdown.Load(); dropped > 0 { - level.Error(s.qm.logger).Log("msg", "Failed to flush all exemplars on shutdown", "count", dropped) + + // Log error for any dropped samples, exemplars, or histograms. + logDroppedError := func(t string, counter atomic.Uint32) { + if dropped := counter.Load(); dropped > 0 { + level.Error(s.qm.logger).Log("msg", fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) + } } + logDroppedError("samples", s.samplesDroppedOnHardShutdown) + logDroppedError("exemplars", s.exemplarsDroppedOnHardShutdown) + logDroppedError("histograms", s.histogramsDroppedOnHardShutdown) } // enqueue data (sample or exemplar). If the shard is full, shutting down, or @@ -1537,7 +1542,7 @@ func (s *shards) sendSamples(ctx context.Context, samples []prompb.TimeSeries, s begin := time.Now() err := s.sendSamplesWithBackoff(ctx, samples, sampleCount, exemplarCount, histogramCount, pBuf, buf) if err != nil { - level.Error(s.qm.logger).Log("msg", "non-recoverable error", "count", sampleCount, "exemplarCount", exemplarCount, "err", err) + level.Error(s.qm.logger).Log("msg", "non-recoverable error", "count", sampleCount, "exemplarCount", exemplarCount, "histogramCount", histogramCount, "err", err) s.qm.metrics.failedSamplesTotal.Add(float64(sampleCount)) s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarCount)) s.qm.metrics.failedHistogramsTotal.Add(float64(histogramCount))