diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 37efe40036..b3f05b2dcf 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -6842,7 +6842,7 @@ func testOOODisabled(t *testing.T, scenario sampleTypeScenario) { requireEqualSeries(t, expSamples, seriesSet, true) requireEqualOOOSamples(t, 0, db) require.Equal(t, float64(failedSamples), - prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))+prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType)), + prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))+prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType)), "number of ooo/oob samples mismatch") // Verifying that no OOO artifacts were generated. diff --git a/tsdb/head.go b/tsdb/head.go index 4f4e4febfc..efbedea264 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -363,9 +363,6 @@ type headMetrics struct { gcDuration prometheus.Summary samplesAppended *prometheus.CounterVec outOfOrderSamplesAppended *prometheus.CounterVec - outOfBoundSamples *prometheus.CounterVec - outOfOrderSamples *prometheus.CounterVec - tooOldSamples *prometheus.CounterVec walTruncateDuration prometheus.Summary walCorruptionsTotal prometheus.Counter dataTotalReplayDuration prometheus.Gauge @@ -379,11 +376,15 @@ type headMetrics struct { snapshotReplayErrorTotal prometheus.Counter // Will be either 0 or 1. oooHistogram prometheus.Histogram mmapChunksTotal prometheus.Counter + sampleAppendFailures *prometheus.CounterVec } const ( sampleMetricTypeFloat = "float" sampleMetricTypeHistogram = "histogram" + outOfBounds = "out_of_bounds" + outOfOrder = "out_of_order" + tooOld = "too_old" ) func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { @@ -438,6 +439,10 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { Name: "prometheus_tsdb_data_replay_duration_seconds", Help: "Time taken to replay the data on disk.", }), + sampleAppendFailures: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "prometheus_tsdb_head_append_failures_total", + Help: "Total number of sample append failures with different reasons.", + }, []string{"reason", "type"}), samplesAppended: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "prometheus_tsdb_head_samples_appended_total", Help: "Total number of appended samples.", @@ -446,18 +451,6 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { Name: "prometheus_tsdb_head_out_of_order_samples_appended_total", Help: "Total number of appended out of order samples.", }, []string{"type"}), - outOfBoundSamples: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "prometheus_tsdb_out_of_bound_samples_total", - Help: "Total number of out of bound samples ingestion failed attempts with out of order support disabled.", - }, []string{"type"}), - outOfOrderSamples: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "prometheus_tsdb_out_of_order_samples_total", - Help: "Total number of out of order samples ingestion failed attempts due to out of order being disabled.", - }, []string{"type"}), - tooOldSamples: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "prometheus_tsdb_too_old_samples_total", - Help: "Total number of out of order samples ingestion failed attempts with out of support enabled, but sample outside of time window.", - }, []string{"type"}), headTruncateFail: prometheus.NewCounter(prometheus.CounterOpts{ Name: "prometheus_tsdb_head_truncations_failed_total", Help: "Total number of head truncations that failed.", @@ -528,9 +521,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { m.dataTotalReplayDuration, m.samplesAppended, m.outOfOrderSamplesAppended, - m.outOfBoundSamples, - m.outOfOrderSamples, - m.tooOldSamples, + m.sampleAppendFailures, m.headTruncateFail, m.headTruncateTotal, m.checkpointDeleteFail, diff --git a/tsdb/head_append.go b/tsdb/head_append.go index c94c42bc53..7bd19829af 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -343,7 +343,7 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 // Fail fast if OOO is disabled and the sample is out of bounds. // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. if a.oooTimeWindow == 0 && t < a.minValidTime { - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeFloat).Inc() return 0, storage.ErrOutOfBounds } @@ -377,7 +377,7 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Inc() return 0, storage.ErrOutOfOrderSample } s.pendingCommit = true @@ -388,9 +388,9 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 if err != nil { switch { case errors.Is(err, storage.ErrOutOfOrderSample): - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Inc() case errors.Is(err, storage.ErrTooOldSample): - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeFloat).Inc() } return 0, err } @@ -655,7 +655,7 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels // Fail fast if OOO is disabled and the sample is out of bounds. // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. if (a.oooTimeWindow == 0 || !a.head.opts.EnableOOONativeHistograms.Load()) && t < a.minValidTime { - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeHistogram).Inc() return 0, storage.ErrOutOfBounds } @@ -707,9 +707,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels case errors.Is(err, storage.ErrOutOfOrderSample): fallthrough case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Inc() case errors.Is(err, storage.ErrTooOldSample): - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeHistogram).Inc() } return 0, err } @@ -744,9 +744,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels case errors.Is(err, storage.ErrOutOfOrderSample): fallthrough case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Inc() case errors.Is(err, storage.ErrTooOldSample): - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeHistogram).Inc() } return 0, err } @@ -1491,10 +1491,10 @@ func (a *headAppender) Commit() (err error) { a.commitFloatHistograms(acc) a.commitMetadata() - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.sampleAppendFailures.WithLabelValues(tooOld, sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 695428619f..3a9391cefe 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -2854,19 +2854,19 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti require.NoError(t, app.Commit()) // Test out of order metric. - require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) app = db.Appender(ctx) _, err = appendSample(app, 2) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) _, err = appendSample(app, 3) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) _, err = appendSample(app, 4) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) require.NoError(t, app.Commit()) // Compact Head to test out of bound metric. @@ -2882,11 +2882,11 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti app = db.Appender(ctx) _, err = appendSample(app, db.head.minValidTime.Load()-2) require.Equal(t, storage.ErrOutOfBounds, err) - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()-1) require.Equal(t, storage.ErrOutOfBounds, err) - require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfBounds, scenario.sampleType))) require.NoError(t, app.Commit()) // Some more valid samples for out of order. @@ -2901,15 +2901,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, opti app = db.Appender(ctx) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+2) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+3) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+4) require.Equal(t, expectOutOfOrderError, err) - require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) + require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.sampleAppendFailures.WithLabelValues(outOfOrder, scenario.sampleType))) require.NoError(t, app.Commit()) } @@ -4474,7 +4474,7 @@ func TestHistogramMetrics(t *testing.T) { require.NoError(t, err) require.NoError(t, head.Init(0)) - require.Equal(t, float64(0), prom_testutil.ToFloat64(head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram))) // Counter reset. + require.Equal(t, float64(0), prom_testutil.ToFloat64(head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram))) } func TestHistogramStaleSample(t *testing.T) {