queue_manager: add histogram info to error logs (#14326)

Signed-off-by: Piotr Gwizdala <17101802+thampiotr@users.noreply.github.com>
This commit is contained in:
Piotr 2024-06-21 00:45:13 +01:00 committed by GitHub
parent dbd29df5df
commit d78253319d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -16,6 +16,7 @@ package remote
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"math" "math"
"strconv" "strconv"
"sync" "sync"
@ -1224,12 +1225,16 @@ func (s *shards) stop() {
// Force an unclean shutdown. // Force an unclean shutdown.
s.hardShutdown() s.hardShutdown()
<-s.done <-s.done
if dropped := s.samplesDroppedOnHardShutdown.Load(); dropped > 0 {
level.Error(s.qm.logger).Log("msg", "Failed to flush all samples on shutdown", "count", dropped) // Log error for any dropped samples, exemplars, or histograms.
} logDroppedError := func(t string, counter atomic.Uint32) {
if dropped := s.exemplarsDroppedOnHardShutdown.Load(); dropped > 0 { if dropped := counter.Load(); dropped > 0 {
level.Error(s.qm.logger).Log("msg", "Failed to flush all exemplars on shutdown", "count", dropped) level.Error(s.qm.logger).Log("msg", fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped)
}
} }
logDroppedError("samples", s.samplesDroppedOnHardShutdown)
logDroppedError("exemplars", s.exemplarsDroppedOnHardShutdown)
logDroppedError("histograms", s.histogramsDroppedOnHardShutdown)
} }
// enqueue data (sample or exemplar). If the shard is full, shutting down, or // enqueue data (sample or exemplar). If the shard is full, shutting down, or
@ -1537,7 +1542,7 @@ func (s *shards) sendSamples(ctx context.Context, samples []prompb.TimeSeries, s
begin := time.Now() begin := time.Now()
err := s.sendSamplesWithBackoff(ctx, samples, sampleCount, exemplarCount, histogramCount, pBuf, buf) err := s.sendSamplesWithBackoff(ctx, samples, sampleCount, exemplarCount, histogramCount, pBuf, buf)
if err != nil { if err != nil {
level.Error(s.qm.logger).Log("msg", "non-recoverable error", "count", sampleCount, "exemplarCount", exemplarCount, "err", err) level.Error(s.qm.logger).Log("msg", "non-recoverable error", "count", sampleCount, "exemplarCount", exemplarCount, "histogramCount", histogramCount, "err", err)
s.qm.metrics.failedSamplesTotal.Add(float64(sampleCount)) s.qm.metrics.failedSamplesTotal.Add(float64(sampleCount))
s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarCount)) s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarCount))
s.qm.metrics.failedHistogramsTotal.Add(float64(histogramCount)) s.qm.metrics.failedHistogramsTotal.Add(float64(histogramCount))