This commit is contained in:
Aman 2025-03-05 21:34:37 +01:00 committed by GitHub
commit 1cf1c33628
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 33 additions and 42 deletions

View file

@ -6842,7 +6842,7 @@ func testOOODisabled(t *testing.T, scenario sampleTypeScenario) {
requireEqualSeries(t, expSamples, seriesSet, true) requireEqualSeries(t, expSamples, seriesSet, true)
requireEqualOOOSamples(t, 0, db) requireEqualOOOSamples(t, 0, db)
require.Equal(t, float64(failedSamples), require.Equal(t, float64(failedSamples),
prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))+prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType)), prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))+prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType)),
"number of ooo/oob samples mismatch") "number of ooo/oob samples mismatch")
// Verifying that no OOO artifacts were generated. // Verifying that no OOO artifacts were generated.

View file

@ -363,9 +363,6 @@ type headMetrics struct {
gcDuration prometheus.Summary gcDuration prometheus.Summary
samplesAppended *prometheus.CounterVec samplesAppended *prometheus.CounterVec
outOfOrderSamplesAppended *prometheus.CounterVec outOfOrderSamplesAppended *prometheus.CounterVec
outOfBoundSamples *prometheus.CounterVec
outOfOrderSamples *prometheus.CounterVec
tooOldSamples *prometheus.CounterVec
walTruncateDuration prometheus.Summary walTruncateDuration prometheus.Summary
walCorruptionsTotal prometheus.Counter walCorruptionsTotal prometheus.Counter
dataTotalReplayDuration prometheus.Gauge dataTotalReplayDuration prometheus.Gauge
@ -379,11 +376,15 @@ type headMetrics struct {
snapshotReplayErrorTotal prometheus.Counter // Will be either 0 or 1. snapshotReplayErrorTotal prometheus.Counter // Will be either 0 or 1.
oooHistogram prometheus.Histogram oooHistogram prometheus.Histogram
mmapChunksTotal prometheus.Counter mmapChunksTotal prometheus.Counter
sampleAppendFailures *prometheus.CounterVec
} }
const ( const (
sampleMetricTypeFloat = "float" sampleMetricTypeFloat = "float"
sampleMetricTypeHistogram = "histogram" sampleMetricTypeHistogram = "histogram"
outOfBounds = "out_of_bounds"
outOfOrder = "out_of_order"
tooOld = "too_old"
) )
func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
@ -438,6 +439,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_data_replay_duration_seconds", Name: "prometheus_tsdb_data_replay_duration_seconds",
Help: "Time taken to replay the data on disk.", Help: "Time taken to replay the data on disk.",
}), }),
sampleAppendFailures: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_head_append_failures_total",
Help: "Total number of sample append failures with different reasons.",
}, []string{"reason", "type"}),
samplesAppended: prometheus.NewCounterVec(prometheus.CounterOpts{ samplesAppended: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_head_samples_appended_total", Name: "prometheus_tsdb_head_samples_appended_total",
Help: "Total number of appended samples.", Help: "Total number of appended samples.",
@ -446,18 +451,6 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
Name: "prometheus_tsdb_head_out_of_order_samples_appended_total", Name: "prometheus_tsdb_head_out_of_order_samples_appended_total",
Help: "Total number of appended out of order samples.", Help: "Total number of appended out of order samples.",
}, []string{"type"}), }, []string{"type"}),
outOfBoundSamples: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_out_of_bound_samples_total",
Help: "Total number of out of bound samples ingestion failed attempts with out of order support disabled.",
}, []string{"type"}),
outOfOrderSamples: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_out_of_order_samples_total",
Help: "Total number of out of order samples ingestion failed attempts due to out of order being disabled.",
}, []string{"type"}),
tooOldSamples: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_too_old_samples_total",
Help: "Total number of out of order samples ingestion failed attempts with out of support enabled, but sample outside of time window.",
}, []string{"type"}),
headTruncateFail: prometheus.NewCounter(prometheus.CounterOpts{ headTruncateFail: prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_head_truncations_failed_total", Name: "prometheus_tsdb_head_truncations_failed_total",
Help: "Total number of head truncations that failed.", Help: "Total number of head truncations that failed.",
@ -528,9 +521,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
m.dataTotalReplayDuration, m.dataTotalReplayDuration,
m.samplesAppended, m.samplesAppended,
m.outOfOrderSamplesAppended, m.outOfOrderSamplesAppended,
m.outOfBoundSamples, m.sampleAppendFailures,
m.outOfOrderSamples,
m.tooOldSamples,
m.headTruncateFail, m.headTruncateFail,
m.headTruncateTotal, m.headTruncateTotal,
m.checkpointDeleteFail, m.checkpointDeleteFail,

View file

@ -343,7 +343,7 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64
// Fail fast if OOO is disabled and the sample is out of bounds. // Fail fast if OOO is disabled and the sample is out of bounds.
// Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order.
if a.oooTimeWindow == 0 && t < a.minValidTime { if a.oooTimeWindow == 0 && t < a.minValidTime {
a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeFloat).Inc()
return 0, storage.ErrOutOfBounds return 0, storage.ErrOutOfBounds
} }
@ -377,7 +377,7 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64
isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow)
if err == nil { if err == nil {
if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder {
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Inc()
return 0, storage.ErrOutOfOrderSample return 0, storage.ErrOutOfOrderSample
} }
s.pendingCommit = true s.pendingCommit = true
@ -388,9 +388,9 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64
if err != nil { if err != nil {
switch { switch {
case errors.Is(err, storage.ErrOutOfOrderSample): case errors.Is(err, storage.ErrOutOfOrderSample):
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Inc()
case errors.Is(err, storage.ErrTooOldSample): case errors.Is(err, storage.ErrTooOldSample):
a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeFloat).Inc()
} }
return 0, err return 0, err
} }
@ -655,7 +655,7 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
// Fail fast if OOO is disabled and the sample is out of bounds. // Fail fast if OOO is disabled and the sample is out of bounds.
// Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order.
if (a.oooTimeWindow == 0 || !a.head.opts.EnableOOONativeHistograms.Load()) && t < a.minValidTime { if (a.oooTimeWindow == 0 || !a.head.opts.EnableOOONativeHistograms.Load()) && t < a.minValidTime {
a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeHistogram).Inc()
return 0, storage.ErrOutOfBounds return 0, storage.ErrOutOfBounds
} }
@ -707,9 +707,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
case errors.Is(err, storage.ErrOutOfOrderSample): case errors.Is(err, storage.ErrOutOfOrderSample):
fallthrough fallthrough
case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): case errors.Is(err, storage.ErrOOONativeHistogramsDisabled):
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Inc()
case errors.Is(err, storage.ErrTooOldSample): case errors.Is(err, storage.ErrTooOldSample):
a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeHistogram).Inc()
} }
return 0, err return 0, err
} }
@ -744,9 +744,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
case errors.Is(err, storage.ErrOutOfOrderSample): case errors.Is(err, storage.ErrOutOfOrderSample):
fallthrough fallthrough
case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): case errors.Is(err, storage.ErrOOONativeHistogramsDisabled):
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Inc()
case errors.Is(err, storage.ErrTooOldSample): case errors.Is(err, storage.ErrTooOldSample):
a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeHistogram).Inc()
} }
return 0, err return 0, err
} }
@ -1491,10 +1491,10 @@ func (a *headAppender) Commit() (err error) {
a.commitFloatHistograms(acc) a.commitFloatHistograms(acc)
a.commitMetadata() a.commitMetadata()
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected))
a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Add(float64(acc.floatOOORejected))
a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected))
a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected))
a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended))
a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended))
a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted))

View file

@ -2854,19 +2854,19 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti
require.NoError(t, app.Commit()) require.NoError(t, app.Commit())
// Test out of order metric. // Test out of order metric.
require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
app = db.Appender(ctx) app = db.Appender(ctx)
_, err = appendSample(app, 2) _, err = appendSample(app, 2)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
_, err = appendSample(app, 3) _, err = appendSample(app, 3)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
_, err = appendSample(app, 4) _, err = appendSample(app, 4)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
require.NoError(t, app.Commit()) require.NoError(t, app.Commit())
// Compact Head to test out of bound metric. // Compact Head to test out of bound metric.
@ -2882,11 +2882,11 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti
app = db.Appender(ctx) app = db.Appender(ctx)
_, err = appendSample(app, db.head.minValidTime.Load()-2) _, err = appendSample(app, db.head.minValidTime.Load()-2)
require.Equal(t, storage.ErrOutOfBounds, err) require.Equal(t, storage.ErrOutOfBounds, err)
require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType)))
_, err = appendSample(app, db.head.minValidTime.Load()-1) _, err = appendSample(app, db.head.minValidTime.Load()-1)
require.Equal(t, storage.ErrOutOfBounds, err) require.Equal(t, storage.ErrOutOfBounds, err)
require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType)))
require.NoError(t, app.Commit()) require.NoError(t, app.Commit())
// Some more valid samples for out of order. // Some more valid samples for out of order.
@ -2901,15 +2901,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti
app = db.Appender(ctx) app = db.Appender(ctx)
_, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+2) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+2)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
_, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+3) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+3)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
_, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+4) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+4)
require.Equal(t, expectOutOfOrderError, err) require.Equal(t, expectOutOfOrderError, err)
require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType)))
require.NoError(t, app.Commit()) require.NoError(t, app.Commit())
} }
@ -4474,7 +4474,7 @@ func TestHistogramMetrics(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NoError(t, head.Init(0)) require.NoError(t, head.Init(0))
require.Equal(t, float64(0), prom_testutil.ToFloat64(head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram))) // Counter reset. require.Equal(t, float64(0), prom_testutil.ToFloat64(head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram)))
} }
func TestHistogramStaleSample(t *testing.T) { func TestHistogramStaleSample(t *testing.T) {