From ff3d4e91dcf8c06ee9206853cd1e1e30c4c52bb3 Mon Sep 17 00:00:00 2001
From: Nolwenn Cauchois <nolwenn.cauchois@orange.com>
Date: Fri, 20 May 2022 12:35:16 +0200
Subject: [PATCH 01/66] mixin: Use url filter on Remote Write dashboard

Signed-off-by: Nolwenn Cauchois <nolwenn.cauchois@orange.com>
---
 .../prometheus-mixin/dashboards.libsonnet     | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/documentation/prometheus-mixin/dashboards.libsonnet b/documentation/prometheus-mixin/dashboards.libsonnet
index b95f13e0a0..8d4ff61155 100644
--- a/documentation/prometheus-mixin/dashboards.libsonnet
+++ b/documentation/prometheus-mixin/dashboards.libsonnet
@@ -117,7 +117,7 @@ local template = grafana.template;
             (
               prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"} 
             -  
-              ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"} != 0)
+              ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance", url=~"$url"} != 0)
             )
           |||,
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}',
@@ -134,7 +134,7 @@ local template = grafana.template;
             clamp_min(
               rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])  
             - 
-              ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])
+              ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m])
             , 0)
           |||,
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}',
@@ -151,9 +151,9 @@ local template = grafana.template;
             rate(
               prometheus_remote_storage_samples_in_total{cluster=~"$cluster", instance=~"$instance"}[5m])
             - 
-              ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]))
+              ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]))
             - 
-              (rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~"$cluster", instance=~"$instance"}[5m]))
+              (rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]))
           |||,
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
@@ -166,7 +166,7 @@ local template = grafana.template;
           min_span=6,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -177,7 +177,7 @@ local template = grafana.template;
           span=4,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_shards_max{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_shards_max{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -188,7 +188,7 @@ local template = grafana.template;
           span=4,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_shards_min{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_shards_min{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -199,7 +199,7 @@ local template = grafana.template;
           span=4,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_shards_desired{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_shards_desired{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -210,7 +210,7 @@ local template = grafana.template;
           span=6,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -222,7 +222,7 @@ local template = grafana.template;
           span=6,
         )
         .addTarget(prometheus.target(
-          'prometheus_remote_storage_pending_samples{cluster=~"$cluster", instance=~"$instance"} or prometheus_remote_storage_samples_pending{cluster=~"$cluster", instance=~"$instance"}',
+          'prometheus_remote_storage_pending_samples{cluster=~"$cluster", instance=~"$instance", url=~"$url"} or prometheus_remote_storage_samples_pending{cluster=~"$cluster", instance=~"$instance", url=~"$url"}',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -257,7 +257,7 @@ local template = grafana.template;
           span=3,
         )
         .addTarget(prometheus.target(
-          'rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
+          'rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m])',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -268,7 +268,7 @@ local template = grafana.template;
           span=3,
         )
         .addTarget(prometheus.target(
-          'rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
+          'rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m])',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -279,7 +279,7 @@ local template = grafana.template;
           span=3,
         )
         .addTarget(prometheus.target(
-          'rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
+          'rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m])',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 
@@ -290,7 +290,7 @@ local template = grafana.template;
           span=3,
         )
         .addTarget(prometheus.target(
-          'rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
+          'rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance", url=~"$url"}[5m])',
           legendFormat='{{cluster}}:{{instance}} {{remote_name}}:{{url}}'
         ));
 

From 1155d736b6b88e718a7c2b19f274b9bc8ea6ff29 Mon Sep 17 00:00:00 2001
From: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Date: Thu, 21 Sep 2023 12:30:08 +0200
Subject: [PATCH 02/66] Improve sensitivity of TestQuerierIndexQueriesRace

Currently, the two goroutines race against each other and it's possible that the main test goroutine finishes way earlier than appendSeries has had a chance to run at all.

I tested this change by breaking the code that X fixed and running the race test 100 times. Without the additional time.Sleep the test failed 11 times. With the sleep it failed 65 out of the 100 runs. Which is still not ideal, but it's a step forward.

Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
---
 tsdb/querier_test.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go
index fc6c688010..b1a61d1d62 100644
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@@ -2225,6 +2225,7 @@ func TestQuerierIndexQueriesRace(t *testing.T) {
 	for _, c := range testCases {
 		c := c
 		t.Run(fmt.Sprintf("%v", c.matchers), func(t *testing.T) {
+			t.Parallel()
 			db := openTestDB(t, DefaultOptions(), nil)
 			h := db.Head()
 			t.Cleanup(func() {
@@ -2244,6 +2245,9 @@ func TestQuerierIndexQueriesRace(t *testing.T) {
 				values, _, err := q.LabelValues(ctx, "seq", c.matchers...)
 				require.NoError(t, err)
 				require.Emptyf(t, values, `label values for label "seq" should be empty`)
+
+				// Sleep to give the appends some change to run.
+				time.Sleep(time.Millisecond)
 			}
 		})
 	}
@@ -2260,6 +2264,7 @@ func appendSeries(t *testing.T, ctx context.Context, wg *sync.WaitGroup, h *Head
 		require.NoError(t, err)
 
 		// Throttle down the appends to keep the test somewhat nimble.
+		// Otherwise, we end up appending thousands or millions of samples.
 		time.Sleep(time.Millisecond)
 	}
 }

From c5c5c569fa4fe77727f491909adf0943af656fed Mon Sep 17 00:00:00 2001
From: Linas Medziunas <linas.medziunas@gmail.com>
Date: Mon, 9 Oct 2023 09:40:59 +0300
Subject: [PATCH 03/66] Histogram performance: optimize floatBucketIterator

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
---
 model/histogram/float_histogram.go      | 66 ++++++++++++++-----------
 model/histogram/float_histogram_test.go | 49 ++++++++++++++++++
 model/histogram/generic.go              | 22 +++++++--
 model/histogram/histogram.go            | 10 ++--
 4 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index 41873278cb..a2c115e03a 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -434,25 +434,25 @@ func (h *FloatHistogram) DetectReset(previous *FloatHistogram) bool {
 	}
 	currIt := h.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
 	prevIt := previous.floatBucketIterator(true, h.ZeroThreshold, h.Schema)
-	if detectReset(currIt, prevIt) {
+	if detectReset(&currIt, &prevIt) {
 		return true
 	}
 	currIt = h.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
 	prevIt = previous.floatBucketIterator(false, h.ZeroThreshold, h.Schema)
-	return detectReset(currIt, prevIt)
+	return detectReset(&currIt, &prevIt)
 }
 
-func detectReset(currIt, prevIt BucketIterator[float64]) bool {
+func detectReset(currIt, prevIt *floatBucketIterator) bool {
 	if !prevIt.Next() {
 		return false // If no buckets in previous histogram, nothing can be reset.
 	}
-	prevBucket := prevIt.At()
+	prevBucket := prevIt.strippedAt()
 	if !currIt.Next() {
 		// No bucket in current, but at least one in previous
 		// histogram. Check if any of those are non-zero, in which case
 		// this is a reset.
 		for {
-			if prevBucket.Count != 0 {
+			if prevBucket.count != 0 {
 				return true
 			}
 			if !prevIt.Next() {
@@ -460,10 +460,10 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 			}
 		}
 	}
-	currBucket := currIt.At()
+	currBucket := currIt.strippedAt()
 	for {
 		// Forward currIt until we find the bucket corresponding to prevBucket.
-		for currBucket.Index < prevBucket.Index {
+		for currBucket.index < prevBucket.index {
 			if !currIt.Next() {
 				// Reached end of currIt early, therefore
 				// previous histogram has a bucket that the
@@ -471,7 +471,7 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 				// remaining buckets in the previous histogram
 				// are unpopulated, this is a reset.
 				for {
-					if prevBucket.Count != 0 {
+					if prevBucket.count != 0 {
 						return true
 					}
 					if !prevIt.Next() {
@@ -479,18 +479,18 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 					}
 				}
 			}
-			currBucket = currIt.At()
+			currBucket = currIt.strippedAt()
 		}
-		if currBucket.Index > prevBucket.Index {
+		if currBucket.index > prevBucket.index {
 			// Previous histogram has a bucket the current one does
 			// not have. If it's populated, it's a reset.
-			if prevBucket.Count != 0 {
+			if prevBucket.count != 0 {
 				return true
 			}
 		} else {
 			// We have reached corresponding buckets in both iterators.
 			// We can finally compare the counts.
-			if currBucket.Count < prevBucket.Count {
+			if currBucket.count < prevBucket.count {
 				return true
 			}
 		}
@@ -498,35 +498,39 @@ func detectReset(currIt, prevIt BucketIterator[float64]) bool {
 			// Reached end of prevIt without finding offending buckets.
 			return false
 		}
-		prevBucket = prevIt.At()
+		prevBucket = prevIt.strippedAt()
 	}
 }
 
 // PositiveBucketIterator returns a BucketIterator to iterate over all positive
 // buckets in ascending order (starting next to the zero bucket and going up).
 func (h *FloatHistogram) PositiveBucketIterator() BucketIterator[float64] {
-	return h.floatBucketIterator(true, 0, h.Schema)
+	it := h.floatBucketIterator(true, 0, h.Schema)
+	return &it
 }
 
 // NegativeBucketIterator returns a BucketIterator to iterate over all negative
 // buckets in descending order (starting next to the zero bucket and going
 // down).
 func (h *FloatHistogram) NegativeBucketIterator() BucketIterator[float64] {
-	return h.floatBucketIterator(false, 0, h.Schema)
+	it := h.floatBucketIterator(false, 0, h.Schema)
+	return &it
 }
 
 // PositiveReverseBucketIterator returns a BucketIterator to iterate over all
 // positive buckets in descending order (starting at the highest bucket and
 // going down towards the zero bucket).
 func (h *FloatHistogram) PositiveReverseBucketIterator() BucketIterator[float64] {
-	return newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	it := newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	return &it
 }
 
 // NegativeReverseBucketIterator returns a BucketIterator to iterate over all
 // negative buckets in ascending order (starting at the lowest bucket and going
 // up towards the zero bucket).
 func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64] {
-	return newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	it := newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	return &it
 }
 
 // AllBucketIterator returns a BucketIterator to iterate over all negative,
@@ -537,8 +541,8 @@ func (h *FloatHistogram) NegativeReverseBucketIterator() BucketIterator[float64]
 func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
 	return &allFloatBucketIterator{
 		h:         h,
-		leftIter:  h.NegativeReverseBucketIterator(),
-		rightIter: h.PositiveBucketIterator(),
+		leftIter:  newReverseFloatBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false),
+		rightIter: h.floatBucketIterator(true, 0, h.Schema),
 		state:     -1,
 	}
 }
@@ -551,8 +555,8 @@ func (h *FloatHistogram) AllBucketIterator() BucketIterator[float64] {
 func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
 	return &allFloatBucketIterator{
 		h:         h,
-		leftIter:  h.PositiveReverseBucketIterator(),
-		rightIter: h.NegativeBucketIterator(),
+		leftIter:  newReverseFloatBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true),
+		rightIter: h.floatBucketIterator(false, 0, h.Schema),
 		state:     -1,
 	}
 }
@@ -683,11 +687,11 @@ func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 {
 // targetSchema prior to iterating (without mutating FloatHistogram).
 func (h *FloatHistogram) floatBucketIterator(
 	positive bool, absoluteStartValue float64, targetSchema int32,
-) *floatBucketIterator {
+) floatBucketIterator {
 	if targetSchema > h.Schema {
 		panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema))
 	}
-	i := &floatBucketIterator{
+	i := floatBucketIterator{
 		baseBucketIterator: baseBucketIterator[float64, float64]{
 			schema:   h.Schema,
 			positive: positive,
@@ -705,11 +709,11 @@ func (h *FloatHistogram) floatBucketIterator(
 	return i
 }
 
-// reverseFloatbucketiterator is a low-level constructor for reverse bucket iterators.
+// reverseFloatBucketIterator is a low-level constructor for reverse bucket iterators.
 func newReverseFloatBucketIterator(
 	spans []Span, buckets []float64, schema int32, positive bool,
-) *reverseFloatBucketIterator {
-	r := &reverseFloatBucketIterator{
+) reverseFloatBucketIterator {
+	r := reverseFloatBucketIterator{
 		baseBucketIterator: baseBucketIterator[float64, float64]{
 			schema:   schema,
 			spans:    spans,
@@ -737,6 +741,8 @@ type floatBucketIterator struct {
 	targetSchema       int32   // targetSchema is the schema to merge to and must be ≤ schema.
 	origIdx            int32   // The bucket index within the original schema.
 	absoluteStartValue float64 // Never return buckets with an upper bound ≤ this value.
+
+	boundReachedStartValue bool // Has getBound reached absoluteStartValue already?
 }
 
 func (i *floatBucketIterator) At() Bucket[float64] {
@@ -800,9 +806,10 @@ mergeLoop: // Merge together all buckets from the original schema that fall into
 	}
 	// Skip buckets before absoluteStartValue.
 	// TODO(beorn7): Maybe do something more efficient than this recursive call.
-	if getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
+	if !i.boundReachedStartValue && getBound(i.currIdx, i.targetSchema) <= i.absoluteStartValue {
 		return i.Next()
 	}
+	i.boundReachedStartValue = true
 	return true
 }
 
@@ -843,8 +850,9 @@ func (i *reverseFloatBucketIterator) Next() bool {
 }
 
 type allFloatBucketIterator struct {
-	h                   *FloatHistogram
-	leftIter, rightIter BucketIterator[float64]
+	h         *FloatHistogram
+	leftIter  reverseFloatBucketIterator
+	rightIter floatBucketIterator
 	// -1 means we are iterating negative buckets.
 	// 0 means it is time for the zero bucket.
 	// 1 means we are iterating positive buckets.
diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go
index 0b712be438..0a51b7616c 100644
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@@ -16,6 +16,7 @@ package histogram
 import (
 	"fmt"
 	"math"
+	"math/rand"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -2291,3 +2292,51 @@ func TestFloatBucketIteratorTargetSchema(t *testing.T) {
 	}
 	require.False(t, it.Next(), "negative iterator not exhausted")
 }
+
+func BenchmarkFloatHistogramAllBucketIterator(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+
+	fh := createRandomFloatHistogram(rng, 50)
+
+	b.ReportAllocs() // the current implementation reports 1 alloc
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		for it := fh.AllBucketIterator(); it.Next(); {
+		}
+	}
+}
+
+func BenchmarkFloatHistogramDetectReset(b *testing.B) {
+	rng := rand.New(rand.NewSource(0))
+
+	fh := createRandomFloatHistogram(rng, 50)
+
+	b.ReportAllocs() // the current implementation reports 0 allocs
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		// Detect against the itself (no resets is the worst case input).
+		fh.DetectReset(fh)
+	}
+}
+
+func createRandomFloatHistogram(rng *rand.Rand, spanNum int32) *FloatHistogram {
+	f := &FloatHistogram{}
+	f.PositiveSpans, f.PositiveBuckets = createRandomSpans(rng, spanNum)
+	f.NegativeSpans, f.NegativeBuckets = createRandomSpans(rng, spanNum)
+	return f
+}
+
+func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
+	Spans := make([]Span, spanNum)
+	Buckets := make([]float64, 0)
+	for i := 0; i < int(spanNum); i++ {
+		Spans[i].Offset = rng.Int31n(spanNum) + 1
+		Spans[i].Length = uint32(rng.Int31n(spanNum) + 1)
+		for j := 0; j < int(Spans[i].Length); j++ {
+			Buckets = append(Buckets, float64(rng.Int31n(spanNum)+1))
+		}
+	}
+	return Spans, Buckets
+}
diff --git a/model/histogram/generic.go b/model/histogram/generic.go
index dad54cb069..f678f1ac93 100644
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@@ -53,6 +53,13 @@ type Bucket[BC BucketCount] struct {
 	Index int32
 }
 
+// strippedBucket is Bucket without bound values (which are expensive to calculate
+// and not used in certain use cases).
+type strippedBucket[BC BucketCount] struct {
+	count BC
+	index int32
+}
+
 // String returns a string representation of a Bucket, using the usual
 // mathematical notation of '['/']' for inclusive bounds and '('/')' for
 // non-inclusive bounds.
@@ -101,13 +108,12 @@ type baseBucketIterator[BC BucketCount, IBC InternalBucketCount] struct {
 	currIdx   int32 // The actual bucket index.
 }
 
-func (b baseBucketIterator[BC, IBC]) At() Bucket[BC] {
+func (b *baseBucketIterator[BC, IBC]) At() Bucket[BC] {
 	return b.at(b.schema)
 }
 
-// at is an internal version of the exported At to enable using a different
-// schema.
-func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
+// at is an internal version of the exported At to enable using a different schema.
+func (b *baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
 	bucket := Bucket[BC]{
 		Count: BC(b.currCount),
 		Index: b.currIdx,
@@ -124,6 +130,14 @@ func (b baseBucketIterator[BC, IBC]) at(schema int32) Bucket[BC] {
 	return bucket
 }
 
+// strippedAt returns current strippedBucket (which lacks bucket bounds but is cheaper to compute).
+func (b *baseBucketIterator[BC, IBC]) strippedAt() strippedBucket[BC] {
+	return strippedBucket[BC]{
+		count: BC(b.currCount),
+		index: b.currIdx,
+	}
+}
+
 // compactBuckets is a generic function used by both Histogram.Compact and
 // FloatHistogram.Compact. Set deltaBuckets to true if the provided buckets are
 // deltas. Set it to false if the buckets contain absolute counts.
diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go
index 6d425307c5..1116568276 100644
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@@ -148,13 +148,15 @@ func (h *Histogram) ZeroBucket() Bucket[uint64] {
 // PositiveBucketIterator returns a BucketIterator to iterate over all positive
 // buckets in ascending order (starting next to the zero bucket and going up).
 func (h *Histogram) PositiveBucketIterator() BucketIterator[uint64] {
-	return newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	it := newRegularBucketIterator(h.PositiveSpans, h.PositiveBuckets, h.Schema, true)
+	return &it
 }
 
 // NegativeBucketIterator returns a BucketIterator to iterate over all negative
 // buckets in descending order (starting next to the zero bucket and going down).
 func (h *Histogram) NegativeBucketIterator() BucketIterator[uint64] {
-	return newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	it := newRegularBucketIterator(h.NegativeSpans, h.NegativeBuckets, h.Schema, false)
+	return &it
 }
 
 // CumulativeBucketIterator returns a BucketIterator to iterate over a
@@ -325,14 +327,14 @@ type regularBucketIterator struct {
 	baseBucketIterator[uint64, int64]
 }
 
-func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) *regularBucketIterator {
+func newRegularBucketIterator(spans []Span, buckets []int64, schema int32, positive bool) regularBucketIterator {
 	i := baseBucketIterator[uint64, int64]{
 		schema:   schema,
 		spans:    spans,
 		buckets:  buckets,
 		positive: positive,
 	}
-	return &regularBucketIterator{i}
+	return regularBucketIterator{i}
 }
 
 func (r *regularBucketIterator) Next() bool {

From b9f75ceeddb18f9a375f743c98996c2e9154ad2b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:32:04 +0000
Subject: [PATCH 04/66] build(deps): bump github/codeql-action from 2.21.9 to
 2.22.5

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.21.9 to 2.22.5.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/ddccb873888234080b77e9bc2d4764d5ccaaccf9...74483a38d39275f33fcff5f35b679b5ca4a26a99)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 .github/workflows/scorecards.yml      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index d79f2b9d44..d98233757d 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -30,12 +30,12 @@ jobs:
           go-version: '>=1.21 <1.22'
 
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@ddccb873888234080b77e9bc2d4764d5ccaaccf9 # v2.21.9
+        uses: github/codeql-action/init@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5
         with:
           languages: ${{ matrix.language }}
 
       - name: Autobuild
-        uses: github/codeql-action/autobuild@ddccb873888234080b77e9bc2d4764d5ccaaccf9 # v2.21.9
+        uses: github/codeql-action/autobuild@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@ddccb873888234080b77e9bc2d4764d5ccaaccf9 # v2.21.9
+        uses: github/codeql-action/analyze@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 6249ba6234..48278dae7d 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -45,6 +45,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@ddccb873888234080b77e9bc2d4764d5ccaaccf9 # tag=v2.21.9
+        uses: github/codeql-action/upload-sarif@74483a38d39275f33fcff5f35b679b5ca4a26a99 # tag=v2.22.5
         with:
           sarif_file: results.sarif

From b4448e0ef252f27c436e9157c07e4d54021c3d72 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:32:08 +0000
Subject: [PATCH 05/66] build(deps): bump actions/checkout from 4.1.0 to 4.1.1

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.0 to 4.1.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/8ade135a41bc03ea155e62e844d188df1ea18608...b4ffde65f46336ab88eb53be808477a3936bae11)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/buf-lint.yml        |  2 +-
 .github/workflows/buf.yml             |  2 +-
 .github/workflows/ci.yml              | 22 +++++++++++-----------
 .github/workflows/codeql-analysis.yml |  2 +-
 .github/workflows/repo_sync.yml       |  2 +-
 .github/workflows/scorecards.yml      |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml
index b44ba05118..85109b39ab 100644
--- a/.github/workflows/buf-lint.yml
+++ b/.github/workflows/buf-lint.yml
@@ -12,7 +12,7 @@ jobs:
     name: lint
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml
index 58c1bc1989..c2c9dc0706 100644
--- a/.github/workflows/buf.yml
+++ b/.github/workflows/buf.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     if: github.repository_owner == 'prometheus'
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 08fbb0a339..1f574ca6df 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ jobs:
     container:
       image: quay.io/prometheus/golang-builder:1.21-base
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/setup_environment
       - run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1
@@ -35,7 +35,7 @@ jobs:
       image: quay.io/prometheus/golang-builder:1.21-base
 
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/setup_environment
         with:
@@ -52,7 +52,7 @@ jobs:
     name: Go tests on Windows
     runs-on: windows-latest
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
           go-version: '>=1.21 <1.22'
@@ -68,7 +68,7 @@ jobs:
     container:
       image: quay.io/prometheus/golang-builder:1.20-base
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - run: make build
       - run: go test ./tsdb/...
       - run: go test ./tsdb/ -test.tsdb-isolation=false
@@ -81,7 +81,7 @@ jobs:
     container:
       image: quay.io/prometheus/golang-builder:1.20-base
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - run: go install ./cmd/promtool/.
       - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
       - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@@ -104,7 +104,7 @@ jobs:
       matrix:
         thread: [ 0, 1, 2 ]
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/build
         with:
@@ -127,7 +127,7 @@ jobs:
     # Whenever the Go version is updated here, .promu.yml
     # should also be updated.
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/build
         with:
@@ -138,7 +138,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - name: Install Go
         uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
@@ -164,7 +164,7 @@ jobs:
     needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
     if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/publish_main
         with:
@@ -178,7 +178,7 @@ jobs:
     needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - uses: ./.github/promci/actions/publish_release
         with:
@@ -193,7 +193,7 @@ jobs:
     needs: [test_ui, codeql]
     steps:
       - name: Checkout
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
       - name: Install nodejs
         uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index d79f2b9d44..ead87d42f5 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -24,7 +24,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
           go-version: '>=1.21 <1.22'
diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml
index 368b988288..1cf2eee242 100644
--- a/.github/workflows/repo_sync.yml
+++ b/.github/workflows/repo_sync.yml
@@ -13,7 +13,7 @@ jobs:
     container:
       image: quay.io/prometheus/golang-builder
     steps:
-      - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - run: ./scripts/sync_repo_files.sh
         env:
           GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 6249ba6234..d473339c5a 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -21,7 +21,7 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # tag=v4.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
         with:
           persist-credentials: false
 

From 1c272a1f4fb5c78decbb1c2b2513594942a7f2a4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:32:14 +0000
Subject: [PATCH 06/66] build(deps): bump ossf/scorecard-action from 2.2.0 to
 2.3.1

Bumps [ossf/scorecard-action](https://github.com/ossf/scorecard-action) from 2.2.0 to 2.3.1.
- [Release notes](https://github.com/ossf/scorecard-action/releases)
- [Changelog](https://github.com/ossf/scorecard-action/blob/main/RELEASE.md)
- [Commits](https://github.com/ossf/scorecard-action/compare/08b4669551908b1024bb425080c797723083c031...0864cf19026789058feabb7e87baa5f140aac736)

---
updated-dependencies:
- dependency-name: ossf/scorecard-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 6249ba6234..1bc6a436fa 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -26,7 +26,7 @@ jobs:
           persist-credentials: false
 
       - name: "Run analysis"
-        uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # tag=v2.3.1
         with:
           results_file: results.sarif
           results_format: sarif

From ae2b00d77d5b8c949318fcfde91355fb6de86f00 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:43:56 +0000
Subject: [PATCH 07/66] build(deps): bump the k8s-io group with 2 updates

Bumps the k8s-io group with 2 updates: [k8s.io/api](https://github.com/kubernetes/api) and [k8s.io/client-go](https://github.com/kubernetes/client-go).


Updates `k8s.io/api` from 0.28.2 to 0.28.3
- [Commits](https://github.com/kubernetes/api/compare/v0.28.2...v0.28.3)

Updates `k8s.io/client-go` from 0.28.2 to 0.28.3
- [Changelog](https://github.com/kubernetes/client-go/blob/master/CHANGELOG.md)
- [Commits](https://github.com/kubernetes/client-go/compare/v0.28.2...v0.28.3)

---
updated-dependencies:
- dependency-name: k8s.io/api
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: k8s-io
- dependency-name: k8s.io/client-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: k8s-io
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 go.mod |  6 +++---
 go.sum | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/go.mod b/go.mod
index 7adaacfd76..00df80701d 100644
--- a/go.mod
+++ b/go.mod
@@ -81,9 +81,9 @@ require (
 	google.golang.org/protobuf v1.31.0
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
-	k8s.io/api v0.28.2
-	k8s.io/apimachinery v0.28.2
-	k8s.io/client-go v0.28.2
+	k8s.io/api v0.28.3
+	k8s.io/apimachinery v0.28.3
+	k8s.io/client-go v0.28.3
 	k8s.io/klog v1.0.0
 	k8s.io/klog/v2 v2.100.1
 )
diff --git a/go.sum b/go.sum
index 1a08b123cb..bba0f8f02c 100644
--- a/go.sum
+++ b/go.sum
@@ -1208,12 +1208,12 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
 honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
 honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
-k8s.io/api v0.28.2 h1:9mpl5mOb6vXZvqbQmankOfPIGiudghwCoLl1EYfUZbw=
-k8s.io/api v0.28.2/go.mod h1:RVnJBsjU8tcMq7C3iaRSGMeaKt2TWEUXcpIt/90fjEg=
-k8s.io/apimachinery v0.28.2 h1:KCOJLrc6gu+wV1BYgwik4AF4vXOlVJPdiqn0yAWWwXQ=
-k8s.io/apimachinery v0.28.2/go.mod h1:RdzF87y/ngqk9H4z3EL2Rppv5jj95vGS/HaFXrLDApU=
-k8s.io/client-go v0.28.2 h1:DNoYI1vGq0slMBN/SWKMZMw0Rq+0EQW6/AK4v9+3VeY=
-k8s.io/client-go v0.28.2/go.mod h1:sMkApowspLuc7omj1FOSUxSoqjr+d5Q0Yc0LOFnYFJY=
+k8s.io/api v0.28.3 h1:Gj1HtbSdB4P08C8rs9AR94MfSGpRhJgsS+GF9V26xMM=
+k8s.io/api v0.28.3/go.mod h1:MRCV/jr1dW87/qJnZ57U5Pak65LGmQVkKTzf3AtKFHc=
+k8s.io/apimachinery v0.28.3 h1:B1wYx8txOaCQG0HmYF6nbpU8dg6HvA06x5tEffvOe7A=
+k8s.io/apimachinery v0.28.3/go.mod h1:uQTKmIqs+rAYaq+DFaoD2X7pcjLOqbQX2AOiO0nIpb8=
+k8s.io/client-go v0.28.3 h1:2OqNb72ZuTZPKCl+4gTKvqao0AMOl9f3o2ijbAj3LI4=
+k8s.io/client-go v0.28.3/go.mod h1:LTykbBp9gsA7SwqirlCXBWtK0guzfhpoW4qSm7i9dxo=
 k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 h1:LyMgNKD2P8Wn1iAwQU5OhxCKlKJy0sHc+PcDwFB24dQ=
 k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9/go.mod h1:wZK2AVp1uHCp4VamDVgBP2COHZjqD1T68Rf0CM3YjSM=
 k8s.io/utils v0.0.0-20230711102312-30195339c3c7 h1:ZgnF1KZsYxWIifwSNZFZgNtWE89WI5yiP5WwlfDoIyc=

From 2d6f27f10a6e44ddcc110043f17e89182a92458a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:55:51 +0000
Subject: [PATCH 08/66] build(deps): bump actions/checkout from 4.1.0 to 4.1.1
 in /scripts

Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.0 to 4.1.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/8ade135a41bc03ea155e62e844d188df1ea18608...b4ffde65f46336ab88eb53be808477a3936bae11)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 scripts/golangci-lint.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml
index 15cf547be1..babd8a0c46 100644
--- a/scripts/golangci-lint.yml
+++ b/scripts/golangci-lint.yml
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - name: install Go
         uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
         with:

From 8274e248addacbca809f78012d333adba8c1a8dd Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Thu, 2 Nov 2023 15:29:09 +1100
Subject: [PATCH 09/66] Fix issue where `concatenatingChunkIterator` can
 obscure errors.

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 storage/merge.go      |  3 +++
 storage/merge_test.go | 59 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/storage/merge.go b/storage/merge.go
index 50ae88ce09..501e8db09d 100644
--- a/storage/merge.go
+++ b/storage/merge.go
@@ -860,6 +860,9 @@ func (c *concatenatingChunkIterator) Next() bool {
 		c.curr = c.iterators[c.idx].At()
 		return true
 	}
+	if c.iterators[c.idx].Err() != nil {
+		return false
+	}
 	c.idx++
 	return c.Next()
 }
diff --git a/storage/merge_test.go b/storage/merge_test.go
index f68261d276..25c8fa4a8e 100644
--- a/storage/merge_test.go
+++ b/storage/merge_test.go
@@ -853,6 +853,65 @@ func TestConcatenatingChunkSeriesMerger(t *testing.T) {
 	}
 }
 
+func TestConcatenatingChunkIterator(t *testing.T) {
+	chunk1, err := chunks.ChunkFromSamples([]chunks.Sample{fSample{t: 1, f: 10}})
+	require.NoError(t, err)
+	chunk2, err := chunks.ChunkFromSamples([]chunks.Sample{fSample{t: 2, f: 20}})
+	require.NoError(t, err)
+	chunk3, err := chunks.ChunkFromSamples([]chunks.Sample{fSample{t: 3, f: 30}})
+	require.NoError(t, err)
+
+	testError := errors.New("something went wrong")
+
+	testCases := map[string]struct {
+		iterators      []chunks.Iterator
+		expectedChunks []chunks.Meta
+		expectedError  error
+	}{
+		"many successful iterators": {
+			iterators: []chunks.Iterator{
+				NewListChunkSeriesIterator(chunk1, chunk2),
+				NewListChunkSeriesIterator(chunk3),
+			},
+			expectedChunks: []chunks.Meta{chunk1, chunk2, chunk3},
+		},
+		"single failing iterator": {
+			iterators: []chunks.Iterator{
+				errChunksIterator{err: testError},
+			},
+			expectedError: testError,
+		},
+		"some failing and some successful iterators": {
+			iterators: []chunks.Iterator{
+				NewListChunkSeriesIterator(chunk1, chunk2),
+				errChunksIterator{err: testError},
+				NewListChunkSeriesIterator(chunk3),
+			},
+			expectedChunks: []chunks.Meta{chunk1, chunk2}, // Should stop before advancing to last iterator.
+			expectedError:  testError,
+		},
+	}
+
+	for name, testCase := range testCases {
+		t.Run(name, func(t *testing.T) {
+			it := concatenatingChunkIterator{iterators: testCase.iterators}
+			var chks []chunks.Meta
+
+			for it.Next() {
+				chks = append(chks, it.At())
+			}
+
+			require.Equal(t, testCase.expectedChunks, chks)
+
+			if testCase.expectedError == nil {
+				require.NoError(t, it.Err())
+			} else {
+				require.EqualError(t, it.Err(), testCase.expectedError.Error())
+			}
+		})
+	}
+}
+
 type mockQuerier struct {
 	LabelQuerier
 

From ea27db7389fa1bfad1b0e369cfa2053173414bb9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 2 Nov 2023 18:04:56 +0000
Subject: [PATCH 10/66] build(deps): bump github.com/linode/linodego from
 1.23.0 to 1.24.0

Bumps [github.com/linode/linodego](https://github.com/linode/linodego) from 1.23.0 to 1.24.0.
- [Release notes](https://github.com/linode/linodego/releases)
- [Commits](https://github.com/linode/linodego/compare/v1.23.0...v1.24.0)

---
updated-dependencies:
- dependency-name: github.com/linode/linodego
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 go.mod |  4 ++--
 go.sum | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/go.mod b/go.mod
index 7d9ed9c1f1..c1feeff22f 100644
--- a/go.mod
+++ b/go.mod
@@ -36,7 +36,7 @@ require (
 	github.com/json-iterator/go v1.1.12
 	github.com/klauspost/compress v1.17.1
 	github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b
-	github.com/linode/linodego v1.23.0
+	github.com/linode/linodego v1.24.0
 	github.com/miekg/dns v1.1.56
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
 	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f
@@ -135,7 +135,7 @@ require (
 	github.com/go-openapi/spec v0.20.9 // indirect
 	github.com/go-openapi/swag v0.22.4 // indirect
 	github.com/go-openapi/validate v0.22.1 // indirect
-	github.com/go-resty/resty/v2 v2.7.0 // indirect
+	github.com/go-resty/resty/v2 v2.10.0 // indirect
 	github.com/golang/glog v1.1.2 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
diff --git a/go.sum b/go.sum
index 265ed3572f..c6293e2b31 100644
--- a/go.sum
+++ b/go.sum
@@ -236,8 +236,8 @@ github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogB
 github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
 github.com/go-openapi/validate v0.22.1 h1:G+c2ub6q47kfX1sOBLwIQwzBVt8qmOAARyo/9Fqs9NU=
 github.com/go-openapi/validate v0.22.1/go.mod h1:rjnrwK57VJ7A8xqfpAOEKRH8yQSGUriMu5/zuPSQ1hg=
-github.com/go-resty/resty/v2 v2.7.0 h1:me+K9p3uhSmXtrBZ4k9jcEAfJmuC8IivWHwaLZwPrFY=
-github.com/go-resty/resty/v2 v2.7.0/go.mod h1:9PWDzw47qPphMRFfhsyk0NnSgvluHcljSMVIq3w7q0I=
+github.com/go-resty/resty/v2 v2.10.0 h1:Qla4W/+TMmv0fOeeRqzEpXPLfTUnR5HZ1+lGs+CkiCo=
+github.com/go-resty/resty/v2 v2.10.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A=
 github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
@@ -498,8 +498,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
 github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
-github.com/linode/linodego v1.23.0 h1:s0ReCZtuN9Z1IoUN9w1RLeYO1dMZUGPwOQ/IBFsBHtU=
-github.com/linode/linodego v1.23.0/go.mod h1:0U7wj/UQOqBNbKv1FYTXiBUXueR8DY4HvIotwE0ENgg=
+github.com/linode/linodego v1.24.0 h1:zO+bMdTE6wPccqP7QIkbxAfACX7DjSX6DW9JE/qOKDQ=
+github.com/linode/linodego v1.24.0/go.mod h1:cq/ty5BCEQnsO6OjMqD7Q03KCCyB8CNM5E3MNg0LV6M=
 github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
 github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@@ -846,6 +846,7 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
 golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -888,10 +889,11 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
 golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
 golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20211029224645-99673261e6eb/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -915,6 +917,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
 golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
 golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -982,12 +985,16 @@ golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
 golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1001,6 +1008,7 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1062,6 +1070,7 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
 golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
 golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

From 9d9c5983215ab183bf4ddcfe0ab7f8b2c3710c3f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 2 Nov 2023 20:09:48 +0000
Subject: [PATCH 11/66] build(deps): bump github.com/klauspost/compress from
 1.17.1 to 1.17.2

Bumps [github.com/klauspost/compress](https://github.com/klauspost/compress) from 1.17.1 to 1.17.2.
- [Release notes](https://github.com/klauspost/compress/releases)
- [Changelog](https://github.com/klauspost/compress/blob/master/.goreleaser.yml)
- [Commits](https://github.com/klauspost/compress/compare/v1.17.1...v1.17.2)

---
updated-dependencies:
- dependency-name: github.com/klauspost/compress
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index c1feeff22f..ccadd3caec 100644
--- a/go.mod
+++ b/go.mod
@@ -34,7 +34,7 @@ require (
 	github.com/hetznercloud/hcloud-go/v2 v2.4.0
 	github.com/ionos-cloud/sdk-go/v6 v6.1.9
 	github.com/json-iterator/go v1.1.12
-	github.com/klauspost/compress v1.17.1
+	github.com/klauspost/compress v1.17.2
 	github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b
 	github.com/linode/linodego v1.24.0
 	github.com/miekg/dns v1.1.56
diff --git a/go.sum b/go.sum
index c6293e2b31..3e49020b5c 100644
--- a/go.sum
+++ b/go.sum
@@ -479,8 +479,8 @@ github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvW
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
-github.com/klauspost/compress v1.17.1 h1:NE3C767s2ak2bweCZo3+rdP4U/HoyVXLv/X9f2gPS5g=
-github.com/klauspost/compress v1.17.1/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
+github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

From fe057fc60d09fa49f70898e4c954566b570ecc5d Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Thu, 2 Nov 2023 21:45:07 +0100
Subject: [PATCH 12/66] use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 .golangci.yml                    | 11 +++++++++--
 discovery/ionos/ionos.go         |  2 +-
 model/textparse/protobufparse.go | 16 ++++++++--------
 scrape/manager.go                |  2 +-
 scrape/target.go                 | 18 +++++++++---------
 storage/fanout_test.go           |  2 +-
 util/annotations/annotations.go  |  4 ++--
 7 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index 871d748c75..2d53106c01 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -37,12 +37,17 @@ issues:
     - path: tsdb/
       linters:
         - errorlint
-    - path: util/
+    - path: tsdb/
+      text: "import 'github.com/pkg/errors' is not allowed"
       linters:
-        - errorlint
+        - depguard
     - path: web/
       linters:
         - errorlint
+    - path: web/
+      text: "import 'github.com/pkg/errors' is not allowed"
+      linters:
+        - depguard
     - linters:
         - godot
       source: "^// ==="
@@ -62,6 +67,8 @@ linters-settings:
           desc: "Use corresponding 'os' or 'io' functions instead."
         - pkg: "regexp"
           desc: "Use github.com/grafana/regexp instead of regexp"
+        - pkg: "github.com/pkg/errors"
+          desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
   errcheck:
     exclude-functions:
       # Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go
index a13a000585..3afed8d799 100644
--- a/discovery/ionos/ionos.go
+++ b/discovery/ionos/ionos.go
@@ -14,10 +14,10 @@
 package ionos
 
 import (
+	"errors"
 	"time"
 
 	"github.com/go-kit/log"
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
 
diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go
index d6d87ee368..9a6dd6f6dc 100644
--- a/model/textparse/protobufparse.go
+++ b/model/textparse/protobufparse.go
@@ -16,6 +16,7 @@ package textparse
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"io"
 	"math"
@@ -24,7 +25,6 @@ import (
 
 	"github.com/gogo/protobuf/proto"
 	"github.com/gogo/protobuf/types"
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 
 	"github.com/prometheus/prometheus/model/exemplar"
@@ -396,10 +396,10 @@ func (p *ProtobufParser) Next() (Entry, error) {
 		// into metricBytes and validate only name, help, and type for now.
 		name := p.mf.GetName()
 		if !model.IsValidMetricName(model.LabelValue(name)) {
-			return EntryInvalid, errors.Errorf("invalid metric name: %s", name)
+			return EntryInvalid, fmt.Errorf("invalid metric name: %s", name)
 		}
 		if help := p.mf.GetHelp(); !utf8.ValidString(help) {
-			return EntryInvalid, errors.Errorf("invalid help for metric %q: %s", name, help)
+			return EntryInvalid, fmt.Errorf("invalid help for metric %q: %s", name, help)
 		}
 		switch p.mf.GetType() {
 		case dto.MetricType_COUNTER,
@@ -410,7 +410,7 @@ func (p *ProtobufParser) Next() (Entry, error) {
 			dto.MetricType_UNTYPED:
 			// All good.
 		default:
-			return EntryInvalid, errors.Errorf("unknown metric type for metric %q: %s", name, p.mf.GetType())
+			return EntryInvalid, fmt.Errorf("unknown metric type for metric %q: %s", name, p.mf.GetType())
 		}
 		p.metricBytes.Reset()
 		p.metricBytes.WriteString(name)
@@ -463,7 +463,7 @@ func (p *ProtobufParser) Next() (Entry, error) {
 			return EntryInvalid, err
 		}
 	default:
-		return EntryInvalid, errors.Errorf("invalid protobuf parsing state: %d", p.state)
+		return EntryInvalid, fmt.Errorf("invalid protobuf parsing state: %d", p.state)
 	}
 	return p.state, nil
 }
@@ -476,13 +476,13 @@ func (p *ProtobufParser) updateMetricBytes() error {
 		b.WriteByte(model.SeparatorByte)
 		n := lp.GetName()
 		if !model.LabelName(n).IsValid() {
-			return errors.Errorf("invalid label name: %s", n)
+			return fmt.Errorf("invalid label name: %s", n)
 		}
 		b.WriteString(n)
 		b.WriteByte(model.SeparatorByte)
 		v := lp.GetValue()
 		if !utf8.ValidString(v) {
-			return errors.Errorf("invalid label value: %s", v)
+			return fmt.Errorf("invalid label value: %s", v)
 		}
 		b.WriteString(v)
 	}
@@ -557,7 +557,7 @@ func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) {
 	}
 	totalLength := varIntLength + int(messageLength)
 	if totalLength > len(b) {
-		return 0, errors.Errorf("protobufparse: insufficient length of buffer, expected at least %d bytes, got %d bytes", totalLength, len(b))
+		return 0, fmt.Errorf("protobufparse: insufficient length of buffer, expected at least %d bytes, got %d bytes", totalLength, len(b))
 	}
 	mf.Reset()
 	return totalLength, mf.Unmarshal(b[varIntLength:totalLength])
diff --git a/scrape/manager.go b/scrape/manager.go
index 69bd4bc42b..a0ac38f6ba 100644
--- a/scrape/manager.go
+++ b/scrape/manager.go
@@ -14,6 +14,7 @@
 package scrape
 
 import (
+	"errors"
 	"fmt"
 	"hash/fnv"
 	"reflect"
@@ -22,7 +23,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	config_util "github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
diff --git a/scrape/target.go b/scrape/target.go
index ad39b6bb26..8cc8597a4e 100644
--- a/scrape/target.go
+++ b/scrape/target.go
@@ -14,6 +14,7 @@
 package scrape
 
 import (
+	"errors"
 	"fmt"
 	"hash/fnv"
 	"net"
@@ -22,7 +23,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 
 	"github.com/prometheus/prometheus/config"
@@ -289,12 +289,12 @@ func (t *Target) intervalAndTimeout(defaultInterval, defaultDuration time.Durati
 	intervalLabel := t.labels.Get(model.ScrapeIntervalLabel)
 	interval, err := model.ParseDuration(intervalLabel)
 	if err != nil {
-		return defaultInterval, defaultDuration, errors.Errorf("Error parsing interval label %q: %v", intervalLabel, err)
+		return defaultInterval, defaultDuration, fmt.Errorf("Error parsing interval label %q: %w", intervalLabel, err)
 	}
 	timeoutLabel := t.labels.Get(model.ScrapeTimeoutLabel)
 	timeout, err := model.ParseDuration(timeoutLabel)
 	if err != nil {
-		return defaultInterval, defaultDuration, errors.Errorf("Error parsing timeout label %q: %v", timeoutLabel, err)
+		return defaultInterval, defaultDuration, fmt.Errorf("Error parsing timeout label %q: %w", timeoutLabel, err)
 	}
 
 	return time.Duration(interval), time.Duration(timeout), nil
@@ -444,7 +444,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort
 		case "https":
 			addr += ":443"
 		default:
-			return labels.EmptyLabels(), labels.EmptyLabels(), errors.Errorf("invalid scheme: %q", cfg.Scheme)
+			return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("invalid scheme: %q", cfg.Scheme)
 		}
 		lb.Set(model.AddressLabel, addr)
 	}
@@ -471,7 +471,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort
 	interval := lb.Get(model.ScrapeIntervalLabel)
 	intervalDuration, err := model.ParseDuration(interval)
 	if err != nil {
-		return labels.EmptyLabels(), labels.EmptyLabels(), errors.Errorf("error parsing scrape interval: %v", err)
+		return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("error parsing scrape interval: %w", err)
 	}
 	if time.Duration(intervalDuration) == 0 {
 		return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("scrape interval cannot be 0")
@@ -480,14 +480,14 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort
 	timeout := lb.Get(model.ScrapeTimeoutLabel)
 	timeoutDuration, err := model.ParseDuration(timeout)
 	if err != nil {
-		return labels.EmptyLabels(), labels.EmptyLabels(), errors.Errorf("error parsing scrape timeout: %v", err)
+		return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("error parsing scrape timeout: %w", err)
 	}
 	if time.Duration(timeoutDuration) == 0 {
 		return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("scrape timeout cannot be 0")
 	}
 
 	if timeoutDuration > intervalDuration {
-		return labels.EmptyLabels(), labels.EmptyLabels(), errors.Errorf("scrape timeout cannot be greater than scrape interval (%q > %q)", timeout, interval)
+		return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("scrape timeout cannot be greater than scrape interval (%q > %q)", timeout, interval)
 	}
 
 	// Meta labels are deleted after relabelling. Other internal labels propagate to
@@ -507,7 +507,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort
 	err = res.Validate(func(l labels.Label) error {
 		// Check label values are valid, drop the target if not.
 		if !model.LabelValue(l.Value).IsValid() {
-			return errors.Errorf("invalid label value for %q: %q", l.Name, l.Value)
+			return fmt.Errorf("invalid label value for %q: %q", l.Name, l.Value)
 		}
 		return nil
 	})
@@ -536,7 +536,7 @@ func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefault
 
 		lset, origLabels, err := PopulateLabels(lb, cfg, noDefaultPort)
 		if err != nil {
-			failures = append(failures, errors.Wrapf(err, "instance %d in group %s", i, tg))
+			failures = append(failures, fmt.Errorf("instance %d in group %s: %w", i, tg, err))
 		}
 		if !lset.IsEmpty() || !origLabels.IsEmpty() {
 			targets = append(targets, NewTarget(lset, origLabels, cfg.Params))
diff --git a/storage/fanout_test.go b/storage/fanout_test.go
index 0f9363d7a7..a99c2f803d 100644
--- a/storage/fanout_test.go
+++ b/storage/fanout_test.go
@@ -15,9 +15,9 @@ package storage_test
 
 import (
 	"context"
+	"errors"
 	"testing"
 
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go
index 9d0b11a089..fa4983fc9f 100644
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@@ -81,8 +81,8 @@ func (a Annotations) AsStrings(query string, maxAnnos int) []string {
 		if maxAnnos > 0 && len(arr) >= maxAnnos {
 			break
 		}
-		anErr, ok := err.(annoErr)
-		if ok {
+		var anErr annoErr
+		if errors.As(err, &anErr) {
 			anErr.Query = query
 			err = anErr
 		}

From 1cd6c1cde5a2e44dc5869016b294c7dccba269dc Mon Sep 17 00:00:00 2001
From: Linas Medziunas <linas.medziunas@gmail.com>
Date: Fri, 13 Oct 2023 10:58:26 +0300
Subject: [PATCH 13/66] ValidateHistogram: strict Count check in absence of
 NaNs

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
---
 promql/engine_test.go |  2 +-
 storage/interface.go  |  1 +
 tsdb/db_test.go       |  5 +++--
 tsdb/head_append.go   | 20 +++++++++++++++-----
 tsdb/head_test.go     | 36 +++++++++++++++++++++---------------
 5 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/promql/engine_test.go b/promql/engine_test.go
index baca992b86..7532a3294e 100644
--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@@ -3399,7 +3399,7 @@ func TestNativeHistogram_HistogramStdDevVar(t *testing.T) {
 		{
 			name: "-50, -8, 0, 3, 8, 9, 100, +Inf",
 			h: &histogram.Histogram{
-				Count:     8,
+				Count:     7,
 				ZeroCount: 1,
 				Sum:       math.Inf(1),
 				Schema:    3,
diff --git a/storage/interface.go b/storage/interface.go
index 211bcbc414..4da152aa42 100644
--- a/storage/interface.go
+++ b/storage/interface.go
@@ -44,6 +44,7 @@ var (
 	ErrExemplarsDisabled             = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
 	ErrNativeHistogramsDisabled      = fmt.Errorf("native histograms are disabled")
 	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
+	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
 	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
 	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
 	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index 70d1844d46..f021faba92 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -508,7 +508,7 @@ func TestAmendHistogramDatapointCausesError(t *testing.T) {
 
 	h := histogram.Histogram{
 		Schema:        3,
-		Count:         61,
+		Count:         52,
 		Sum:           2.7,
 		ZeroThreshold: 0.1,
 		ZeroCount:     42,
@@ -6314,6 +6314,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) {
 		t.Run("buckets disappearing", func(t *testing.T) {
 			h.PositiveSpans[1].Length--
 			h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1]
+			h.Count -= 3
 			appendHistogram(series1, 110, h, &exp1, histogram.CounterReset)
 			testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1})
 		})
@@ -6533,7 +6534,7 @@ func TestNativeHistogramFlag(t *testing.T) {
 		require.NoError(t, db.Close())
 	})
 	h := &histogram.Histogram{
-		Count:         10,
+		Count:         9,
 		ZeroCount:     4,
 		ZeroThreshold: 0.001,
 		Sum:           35.5,
diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index d1f4d3035e..330caad784 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -659,11 +659,21 @@ func ValidateHistogram(h *histogram.Histogram) error {
 		return errors.Wrap(err, "positive side")
 	}
 
-	if c := nCount + pCount + h.ZeroCount; c > h.Count {
-		return errors.Wrap(
-			storage.ErrHistogramCountNotBigEnough,
-			fmt.Sprintf("%d observations found in buckets, but the Count field is %d", c, h.Count),
-		)
+	sumOfBuckets := nCount + pCount + h.ZeroCount
+	if math.IsNaN(h.Sum) {
+		if sumOfBuckets > h.Count {
+			return errors.Wrap(
+				storage.ErrHistogramCountNotBigEnough,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	} else {
+		if sumOfBuckets != h.Count {
+			return errors.Wrap(
+				storage.ErrHistogramCountMismatch,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
 	}
 
 	return nil
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index edecf8dfe0..2feb745f1e 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -3419,7 +3419,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 			hists = tsdbutil.GenerateTestHistograms(numHistograms)
 		}
 		for _, h := range hists {
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s1, ts, h, nil)
@@ -3442,7 +3441,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 			hists = tsdbutil.GenerateTestFloatHistograms(numHistograms)
 		}
 		for _, h := range hists {
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s1, ts, nil, h)
@@ -3484,7 +3482,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 		}
 		for _, h := range hists {
 			ts++
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s2, ts, h, nil)
@@ -3521,7 +3518,6 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 		}
 		for _, h := range hists {
 			ts++
-			h.Count *= 2
 			h.NegativeSpans = h.PositiveSpans
 			h.NegativeBuckets = h.PositiveBuckets
 			_, err := app.AppendHistogram(0, s2, ts, nil, h)
@@ -4907,7 +4903,7 @@ func TestHistogramValidation(t *testing.T) {
 		"valid histogram": {
 			h: tsdbutil.GenerateTestHistograms(1)[0],
 		},
-		"valid histogram that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
 			// This case is possible if NaN values (which do not fall into any bucket) are observed.
 			h: &histogram.Histogram{
 				ZeroCount:       2,
@@ -4917,6 +4913,17 @@ func TestHistogramValidation(t *testing.T) {
 				PositiveBuckets: []int64{1},
 			},
 		},
+		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			h: &histogram.Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             333,
+				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
 		"rejects histogram that has too few negative buckets": {
 			h: &histogram.Histogram{
 				NegativeSpans:   []histogram.Span{{Offset: 0, Length: 1}},
@@ -4981,7 +4988,7 @@ func TestHistogramValidation(t *testing.T) {
 				NegativeBuckets: []int64{1},
 				PositiveBuckets: []int64{1},
 			},
-			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should be at least the number of observations found in the buckets`,
+			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
 			skipFloat: true,
 		},
 		"rejects a histogram that doesn't count the zero bucket in its count": {
@@ -4993,7 +5000,7 @@ func TestHistogramValidation(t *testing.T) {
 				NegativeBuckets: []int64{1},
 				PositiveBuckets: []int64{1},
 			},
-			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should be at least the number of observations found in the buckets`,
+			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
 			skipFloat: true,
 		},
 	}
@@ -5029,8 +5036,8 @@ func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histo
 	numSpans := numBuckets / 10
 	bucketsPerSide := numBuckets / 2
 	spanLength := uint32(bucketsPerSide / numSpans)
-	// Given all bucket deltas are 1, sum numHistograms + 1.
-	observationCount := numBuckets / 2 * (1 + numBuckets)
+	// Given all bucket deltas are 1, sum bucketsPerSide + 1.
+	observationCount := bucketsPerSide * (1 + bucketsPerSide)
 
 	var histograms []*histogram.Histogram
 	for i := 0; i < numHistograms; i++ {
@@ -5491,14 +5498,13 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 				numSamples int
 				numBytes   int
 			}{
-				{30, 696},
-				{30, 700},
-				{30, 708},
-				{30, 693},
+				{40, 896},
+				{40, 899},
+				{40, 896},
+				{30, 690},
 				{30, 691},
-				{30, 692},
-				{30, 695},
 				{30, 694},
+				{30, 693},
 			},
 		},
 		"really large histograms": {

From 1f8aea11d6f1a8b12a89011c1bb17c400454f0b4 Mon Sep 17 00:00:00 2001
From: Linas Medziunas <linas.medziunas@gmail.com>
Date: Fri, 13 Oct 2023 10:58:48 +0300
Subject: [PATCH 14/66] Move histogram validation code to model/histogram

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
---
 model/histogram/test_utils.go    |  52 +++++++++
 model/histogram/validate.go      | 136 +++++++++++++++++++++++
 model/histogram/validate_test.go | 175 +++++++++++++++++++++++++++++
 storage/interface.go             |  17 +--
 tsdb/agent/db.go                 |   4 +-
 tsdb/head_append.go              | 111 +------------------
 tsdb/head_test.go                | 183 +------------------------------
 7 files changed, 377 insertions(+), 301 deletions(-)
 create mode 100644 model/histogram/test_utils.go
 create mode 100644 model/histogram/validate.go
 create mode 100644 model/histogram/validate_test.go

diff --git a/model/histogram/test_utils.go b/model/histogram/test_utils.go
new file mode 100644
index 0000000000..9e9a711c29
--- /dev/null
+++ b/model/histogram/test_utils.go
@@ -0,0 +1,52 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package histogram
+
+// GenerateBigTestHistograms generates a slice of histograms with given number of buckets each.
+func GenerateBigTestHistograms(numHistograms, numBuckets int) []*Histogram {
+	numSpans := numBuckets / 10
+	bucketsPerSide := numBuckets / 2
+	spanLength := uint32(bucketsPerSide / numSpans)
+	// Given all bucket deltas are 1, sum bucketsPerSide + 1.
+	observationCount := bucketsPerSide * (1 + bucketsPerSide)
+
+	var histograms []*Histogram
+	for i := 0; i < numHistograms; i++ {
+		h := &Histogram{
+			Count:           uint64(i + observationCount),
+			ZeroCount:       uint64(i),
+			ZeroThreshold:   1e-128,
+			Sum:             18.4 * float64(i+1),
+			Schema:          2,
+			NegativeSpans:   make([]Span, numSpans),
+			PositiveSpans:   make([]Span, numSpans),
+			NegativeBuckets: make([]int64, bucketsPerSide),
+			PositiveBuckets: make([]int64, bucketsPerSide),
+		}
+
+		for j := 0; j < numSpans; j++ {
+			s := Span{Offset: 1, Length: spanLength}
+			h.NegativeSpans[j] = s
+			h.PositiveSpans[j] = s
+		}
+
+		for j := 0; j < bucketsPerSide; j++ {
+			h.NegativeBuckets[j] = 1
+			h.PositiveBuckets[j] = 1
+		}
+
+		histograms = append(histograms, h)
+	}
+	return histograms
+}
diff --git a/model/histogram/validate.go b/model/histogram/validate.go
new file mode 100644
index 0000000000..41649b7981
--- /dev/null
+++ b/model/histogram/validate.go
@@ -0,0 +1,136 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package histogram
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/pkg/errors"
+)
+
+var (
+	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
+	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
+	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
+	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
+	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
+)
+
+func ValidateHistogram(h *Histogram) error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount uint64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	sumOfBuckets := nCount + pCount + h.ZeroCount
+	if math.IsNaN(h.Sum) {
+		if sumOfBuckets > h.Count {
+			return errors.Wrap(
+				ErrHistogramCountNotBigEnough,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	} else {
+		if sumOfBuckets != h.Count {
+			return errors.Wrap(
+				ErrHistogramCountMismatch,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	}
+
+	return nil
+}
+
+func ValidateFloatHistogram(h *FloatHistogram) error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount float64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	// We do not check for h.Count being at least as large as the sum of the
+	// counts in the buckets because floating point precision issues can
+	// create false positives here.
+
+	return nil
+}
+
+func checkHistogramSpans(spans []Span, numBuckets int) error {
+	var spanBuckets int
+	for n, span := range spans {
+		if n > 0 && span.Offset < 0 {
+			return errors.Wrap(
+				ErrHistogramSpanNegativeOffset,
+				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
+			)
+		}
+		spanBuckets += int(span.Length)
+	}
+	if spanBuckets != numBuckets {
+		return errors.Wrap(
+			ErrHistogramSpansBucketsMismatch,
+			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
+		)
+	}
+	return nil
+}
+
+func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
+	if len(buckets) == 0 {
+		return nil
+	}
+
+	var last IBC
+	for i := 0; i < len(buckets); i++ {
+		var c IBC
+		if deltas {
+			c = last + buckets[i]
+		} else {
+			c = buckets[i]
+		}
+		if c < 0 {
+			return errors.Wrap(
+				ErrHistogramNegativeBucketCount,
+				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
+			)
+		}
+		last = c
+		*count += BC(c)
+	}
+
+	return nil
+}
diff --git a/model/histogram/validate_test.go b/model/histogram/validate_test.go
new file mode 100644
index 0000000000..d9d8f06399
--- /dev/null
+++ b/model/histogram/validate_test.go
@@ -0,0 +1,175 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package histogram
+
+import (
+	"math"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestHistogramValidation(t *testing.T) {
+	tests := map[string]struct {
+		h         *Histogram
+		errMsg    string
+		skipFloat bool
+	}{
+		"valid histogram": {
+			h: &Histogram{
+				Count:         12,
+				ZeroCount:     2,
+				ZeroThreshold: 0.001,
+				Sum:           19.4,
+				Schema:        1,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				PositiveBuckets: []int64{1, 1, -1, 0},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				NegativeBuckets: []int64{1, 1, -1, 0},
+			},
+		},
+		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			// This case is possible if NaN values (which do not fall into any bucket) are observed.
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             math.NaN(),
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+		},
+		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             333,
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects histogram that has too few negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too few positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects a histogram that has a negative span with a negative offset": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram which has a positive span with a negative offset": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram that has a negative bucket with a negative count": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{-1},
+			},
+			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a positive bucket with a negative count": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{-1},
+			},
+			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a lower count than count in buckets": {
+			h: &Histogram{
+				Count:           0,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects a histogram that doesn't count the zero bucket in its count": {
+			h: &Histogram{
+				Count:           2,
+				ZeroCount:       1,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+	}
+
+	for testName, tc := range tests {
+		t.Run(testName, func(t *testing.T) {
+			if err := ValidateHistogram(tc.h); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+			if tc.skipFloat {
+				return
+			}
+			if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func BenchmarkHistogramValidation(b *testing.B) {
+	histograms := GenerateBigTestHistograms(b.N, 500)
+	b.ResetTimer()
+	for _, h := range histograms {
+		require.NoError(b, ValidateHistogram(h))
+	}
+}
diff --git a/storage/interface.go b/storage/interface.go
index 4da152aa42..2b1b6a63eb 100644
--- a/storage/interface.go
+++ b/storage/interface.go
@@ -37,17 +37,12 @@ var (
 	// ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed.
 	ErrTooOldSample = errors.New("too old sample")
 	// ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value.
-	ErrDuplicateSampleForTimestamp   = errors.New("duplicate sample for timestamp")
-	ErrOutOfOrderExemplar            = errors.New("out of order exemplar")
-	ErrDuplicateExemplar             = errors.New("duplicate exemplar")
-	ErrExemplarLabelLength           = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
-	ErrExemplarsDisabled             = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
-	ErrNativeHistogramsDisabled      = fmt.Errorf("native histograms are disabled")
-	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
-	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
-	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
-	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
-	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
+	ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
+	ErrOutOfOrderExemplar          = errors.New("out of order exemplar")
+	ErrDuplicateExemplar           = errors.New("duplicate exemplar")
+	ErrExemplarLabelLength         = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
+	ErrExemplarsDisabled           = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0")
+	ErrNativeHistogramsDisabled    = fmt.Errorf("native histograms are disabled")
 )
 
 // SeriesRef is a generic series reference. In prometheus it is either a
diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go
index 3912b9d52f..188b10585a 100644
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
 
 func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
 	if h != nil {
-		if err := tsdb.ValidateHistogram(h); err != nil {
+		if err := histogram.ValidateHistogram(h); err != nil {
 			return 0, err
 		}
 	}
 
 	if fh != nil {
-		if err := tsdb.ValidateFloatHistogram(fh); err != nil {
+		if err := histogram.ValidateFloatHistogram(fh); err != nil {
 			return 0, err
 		}
 	}
diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index 330caad784..eeaaa369f3 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
 	}
 
 	if h != nil {
-		if err := ValidateHistogram(h); err != nil {
+		if err := histogram.ValidateHistogram(h); err != nil {
 			return 0, err
 		}
 	}
 
 	if fh != nil {
-		if err := ValidateFloatHistogram(fh); err != nil {
+		if err := histogram.ValidateFloatHistogram(fh); err != nil {
 			return 0, err
 		}
 	}
@@ -642,113 +642,6 @@ func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels,
 	return ref, nil
 }
 
-func ValidateHistogram(h *histogram.Histogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount uint64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	sumOfBuckets := nCount + pCount + h.ZeroCount
-	if math.IsNaN(h.Sum) {
-		if sumOfBuckets > h.Count {
-			return errors.Wrap(
-				storage.ErrHistogramCountNotBigEnough,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
-		}
-	} else {
-		if sumOfBuckets != h.Count {
-			return errors.Wrap(
-				storage.ErrHistogramCountMismatch,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
-		}
-	}
-
-	return nil
-}
-
-func ValidateFloatHistogram(h *histogram.FloatHistogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount float64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	// We do not check for h.Count being at least as large as the sum of the
-	// counts in the buckets because floating point precision issues can
-	// create false positives here.
-
-	return nil
-}
-
-func checkHistogramSpans(spans []histogram.Span, numBuckets int) error {
-	var spanBuckets int
-	for n, span := range spans {
-		if n > 0 && span.Offset < 0 {
-			return errors.Wrap(
-				storage.ErrHistogramSpanNegativeOffset,
-				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
-			)
-		}
-		spanBuckets += int(span.Length)
-	}
-	if spanBuckets != numBuckets {
-		return errors.Wrap(
-			storage.ErrHistogramSpansBucketsMismatch,
-			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
-		)
-	}
-	return nil
-}
-
-func checkHistogramBuckets[BC histogram.BucketCount, IBC histogram.InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
-	if len(buckets) == 0 {
-		return nil
-	}
-
-	var last IBC
-	for i := 0; i < len(buckets); i++ {
-		var c IBC
-		if deltas {
-			c = last + buckets[i]
-		} else {
-			c = buckets[i]
-		}
-		if c < 0 {
-			return errors.Wrap(
-				storage.ErrHistogramNegativeBucketCount,
-				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
-			)
-		}
-		last = c
-		*count += BC(c)
-	}
-
-	return nil
-}
-
 var _ storage.GetRef = &headAppender{}
 
 func (a *headAppender) GetRef(lset labels.Labels, hash uint64) (storage.SeriesRef, labels.Labels) {
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index 2feb745f1e..1216dd0a69 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -4894,181 +4894,6 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
 	require.NoError(t, h.Close())
 }
 
-func TestHistogramValidation(t *testing.T) {
-	tests := map[string]struct {
-		h         *histogram.Histogram
-		errMsg    string
-		skipFloat bool
-	}{
-		"valid histogram": {
-			h: tsdbutil.GenerateTestHistograms(1)[0],
-		},
-		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
-			// This case is possible if NaN values (which do not fall into any bucket) are observed.
-			h: &histogram.Histogram{
-				ZeroCount:       2,
-				Count:           4,
-				Sum:             math.NaN(),
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1},
-			},
-		},
-		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
-			h: &histogram.Histogram{
-				ZeroCount:       2,
-				Count:           4,
-				Sum:             333,
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-		"rejects histogram that has too few negative buckets": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too few positive buckets": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many negative buckets": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many positive buckets": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects a histogram that has a negative span with a negative offset": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram which has a positive span with a negative offset": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram that has a negative bucket with a negative count": {
-			h: &histogram.Histogram{
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{-1},
-			},
-			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a positive bucket with a negative count": {
-			h: &histogram.Histogram{
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{-1},
-			},
-			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a lower count than count in buckets": {
-			h: &histogram.Histogram{
-				Count:           0,
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-		"rejects a histogram that doesn't count the zero bucket in its count": {
-			h: &histogram.Histogram{
-				Count:           2,
-				ZeroCount:       1,
-				NegativeSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []histogram.Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-	}
-
-	for testName, tc := range tests {
-		t.Run(testName, func(t *testing.T) {
-			if err := ValidateHistogram(tc.h); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-			if tc.skipFloat {
-				return
-			}
-			if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-		})
-	}
-}
-
-func BenchmarkHistogramValidation(b *testing.B) {
-	histograms := generateBigTestHistograms(b.N, 500)
-	b.ResetTimer()
-	for _, h := range histograms {
-		require.NoError(b, ValidateHistogram(h))
-	}
-}
-
-func generateBigTestHistograms(numHistograms, numBuckets int) []*histogram.Histogram {
-	numSpans := numBuckets / 10
-	bucketsPerSide := numBuckets / 2
-	spanLength := uint32(bucketsPerSide / numSpans)
-	// Given all bucket deltas are 1, sum bucketsPerSide + 1.
-	observationCount := bucketsPerSide * (1 + bucketsPerSide)
-
-	var histograms []*histogram.Histogram
-	for i := 0; i < numHistograms; i++ {
-		h := &histogram.Histogram{
-			Count:           uint64(i + observationCount),
-			ZeroCount:       uint64(i),
-			ZeroThreshold:   1e-128,
-			Sum:             18.4 * float64(i+1),
-			Schema:          2,
-			NegativeSpans:   make([]histogram.Span, numSpans),
-			PositiveSpans:   make([]histogram.Span, numSpans),
-			NegativeBuckets: make([]int64, bucketsPerSide),
-			PositiveBuckets: make([]int64, bucketsPerSide),
-		}
-
-		for j := 0; j < numSpans; j++ {
-			s := histogram.Span{Offset: 1, Length: spanLength}
-			h.NegativeSpans[j] = s
-			h.PositiveSpans[j] = s
-		}
-
-		for j := 0; j < bucketsPerSide; j++ {
-			h.NegativeBuckets[j] = 1
-			h.PositiveBuckets[j] = 1
-		}
-
-		histograms = append(histograms, h)
-	}
-	return histograms
-}
-
 func TestOOOAppendWithNoSeries(t *testing.T) {
 	dir := t.TempDir()
 	wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, wlog.CompressionSnappy)
@@ -5409,7 +5234,7 @@ func BenchmarkCuttingHeadHistogramChunks(b *testing.B) {
 		numSamples = 50000
 		numBuckets = 100
 	)
-	samples := generateBigTestHistograms(numSamples, numBuckets)
+	samples := histogram.GenerateBigTestHistograms(numSamples, numBuckets)
 
 	h, _ := newTestHead(b, DefaultBlockDuration, wlog.CompressionNone, false)
 	defer func() {
@@ -5473,7 +5298,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 		"small histograms": {
 			numTotalSamples: 240,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(240, 10)
+				hists := histogram.GenerateBigTestHistograms(240, 10)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}
@@ -5489,7 +5314,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 		"large histograms": {
 			numTotalSamples: 240,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(240, 100)
+				hists := histogram.GenerateBigTestHistograms(240, 100)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}
@@ -5512,7 +5337,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 			// per chunk.
 			numTotalSamples: 11,
 			histValFunc: func() func(i int) *histogram.Histogram {
-				hists := generateBigTestHistograms(11, 100000)
+				hists := histogram.GenerateBigTestHistograms(11, 100000)
 				return func(i int) *histogram.Histogram {
 					return hists[i]
 				}

From ebed7d0612dca43e3ef89f00c3882b58463d21ab Mon Sep 17 00:00:00 2001
From: Linas Medziunas <linas.medziunas@gmail.com>
Date: Fri, 3 Nov 2023 16:47:59 +0200
Subject: [PATCH 15/66] Change Validate to be a method on histogram structs

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
---
 model/histogram/float_histogram.go |  27 +++++
 model/histogram/generic.go         |  56 +++++++++
 model/histogram/histogram.go       |  45 ++++++++
 model/histogram/histogram_test.go  | 156 +++++++++++++++++++++++++
 model/histogram/validate.go        | 136 ----------------------
 model/histogram/validate_test.go   | 175 -----------------------------
 tsdb/agent/db.go                   |   4 +-
 tsdb/head_append.go                |   4 +-
 8 files changed, 288 insertions(+), 315 deletions(-)
 delete mode 100644 model/histogram/validate.go
 delete mode 100644 model/histogram/validate_test.go

diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index b519cbc58f..fd6c2560f1 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -17,6 +17,8 @@ import (
 	"fmt"
 	"math"
 	"strings"
+
+	"github.com/pkg/errors"
 )
 
 // FloatHistogram is similar to Histogram but uses float64 for all
@@ -593,6 +595,31 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
 	}
 }
 
+// Validate validates consistency between span and bucket slices. Also, buckets are checked
+// against negative values.
+// We do not check for h.Count being at least as large as the sum of the
+// counts in the buckets because floating point precision issues can
+// create false positives here.
+func (h *FloatHistogram) Validate() error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount float64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	return nil
+}
+
 // zeroCountForLargerThreshold returns what the histogram's zero count would be
 // if the ZeroThreshold had the provided larger (or equal) value. If the
 // provided value is less than the histogram's ZeroThreshold, the method panics.
diff --git a/model/histogram/generic.go b/model/histogram/generic.go
index 3c1ad7cc89..22048c44ef 100644
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@@ -17,6 +17,16 @@ import (
 	"fmt"
 	"math"
 	"strings"
+
+	"github.com/pkg/errors"
+)
+
+var (
+	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
+	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
+	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
+	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
+	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
 )
 
 // BucketCount is a type constraint for the count in a bucket, which can be
@@ -347,6 +357,52 @@ func compactBuckets[IBC InternalBucketCount](buckets []IBC, spans []Span, maxEmp
 	return buckets, spans
 }
 
+func checkHistogramSpans(spans []Span, numBuckets int) error {
+	var spanBuckets int
+	for n, span := range spans {
+		if n > 0 && span.Offset < 0 {
+			return errors.Wrap(
+				ErrHistogramSpanNegativeOffset,
+				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
+			)
+		}
+		spanBuckets += int(span.Length)
+	}
+	if spanBuckets != numBuckets {
+		return errors.Wrap(
+			ErrHistogramSpansBucketsMismatch,
+			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
+		)
+	}
+	return nil
+}
+
+func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
+	if len(buckets) == 0 {
+		return nil
+	}
+
+	var last IBC
+	for i := 0; i < len(buckets); i++ {
+		var c IBC
+		if deltas {
+			c = last + buckets[i]
+		} else {
+			c = buckets[i]
+		}
+		if c < 0 {
+			return errors.Wrap(
+				ErrHistogramNegativeBucketCount,
+				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
+			)
+		}
+		last = c
+		*count += BC(c)
+	}
+
+	return nil
+}
+
 func getBound(idx, schema int32) float64 {
 	// Here a bit of context about the behavior for the last bucket counting
 	// regular numbers (called simply "last bucket" below) and the bucket
diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go
index 762e7de816..fa624841e3 100644
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@@ -18,6 +18,7 @@ import (
 	"math"
 	"strings"
 
+	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 )
 
@@ -328,6 +329,50 @@ func (h *Histogram) ToFloat() *FloatHistogram {
 	}
 }
 
+// Validate validates consistency between span and bucket slices. Also, buckets are checked
+// against negative values.
+// For histograms that have not observed any NaN values (based on IsNaN(h.Sum) check), a
+// strict h.Count = nCount + pCount + h.ZeroCount check is performed.
+// Otherwise, only a lower bound check will be done (h.Count >= nCount + pCount + h.ZeroCount),
+// because NaN observations do not increment the values of buckets (but they do increment
+// the total h.Count).
+func (h *Histogram) Validate() error {
+	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+	var nCount, pCount uint64
+	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
+	if err != nil {
+		return errors.Wrap(err, "negative side")
+	}
+	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
+	if err != nil {
+		return errors.Wrap(err, "positive side")
+	}
+
+	sumOfBuckets := nCount + pCount + h.ZeroCount
+	if math.IsNaN(h.Sum) {
+		if sumOfBuckets > h.Count {
+			return errors.Wrap(
+				ErrHistogramCountNotBigEnough,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	} else {
+		if sumOfBuckets != h.Count {
+			return errors.Wrap(
+				ErrHistogramCountMismatch,
+				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
+			)
+		}
+	}
+
+	return nil
+}
+
 type regularBucketIterator struct {
 	baseBucketIterator[uint64, int64]
 }
diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go
index 23fb1779ea..6f12f53e82 100644
--- a/model/histogram/histogram_test.go
+++ b/model/histogram/histogram_test.go
@@ -811,3 +811,159 @@ func TestHistogramCompact(t *testing.T) {
 		})
 	}
 }
+
+func TestHistogramValidation(t *testing.T) {
+	tests := map[string]struct {
+		h         *Histogram
+		errMsg    string
+		skipFloat bool
+	}{
+		"valid histogram": {
+			h: &Histogram{
+				Count:         12,
+				ZeroCount:     2,
+				ZeroThreshold: 0.001,
+				Sum:           19.4,
+				Schema:        1,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				PositiveBuckets: []int64{1, 1, -1, 0},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 2},
+					{Offset: 1, Length: 2},
+				},
+				NegativeBuckets: []int64{1, 1, -1, 0},
+			},
+		},
+		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			// This case is possible if NaN values (which do not fall into any bucket) are observed.
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             math.NaN(),
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+		},
+		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
+			h: &Histogram{
+				ZeroCount:       2,
+				Count:           4,
+				Sum:             333,
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects histogram that has too few negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too few positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many negative buckets": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects histogram that has too many positive buckets": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
+		},
+		"rejects a histogram that has a negative span with a negative offset": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1, 2},
+			},
+			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram which has a positive span with a negative offset": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{1, 2},
+			},
+			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
+		},
+		"rejects a histogram that has a negative bucket with a negative count": {
+			h: &Histogram{
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{-1},
+			},
+			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a positive bucket with a negative count": {
+			h: &Histogram{
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveBuckets: []int64{-1},
+			},
+			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
+		},
+		"rejects a histogram that has a lower count than count in buckets": {
+			h: &Histogram{
+				Count:           0,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+		"rejects a histogram that doesn't count the zero bucket in its count": {
+			h: &Histogram{
+				Count:           2,
+				ZeroCount:       1,
+				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
+				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
+				NegativeBuckets: []int64{1},
+				PositiveBuckets: []int64{1},
+			},
+			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
+			skipFloat: true,
+		},
+	}
+
+	for testName, tc := range tests {
+		t.Run(testName, func(t *testing.T) {
+			if err := tc.h.Validate(); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+			if tc.skipFloat {
+				return
+			}
+
+			fh := tc.h.ToFloat()
+			if err := fh.Validate(); tc.errMsg != "" {
+				require.EqualError(t, err, tc.errMsg)
+			} else {
+				require.NoError(t, err)
+			}
+		})
+	}
+}
+
+func BenchmarkHistogramValidation(b *testing.B) {
+	histograms := GenerateBigTestHistograms(b.N, 500)
+	b.ResetTimer()
+	for _, h := range histograms {
+		require.NoError(b, h.Validate())
+	}
+}
diff --git a/model/histogram/validate.go b/model/histogram/validate.go
deleted file mode 100644
index 41649b7981..0000000000
--- a/model/histogram/validate.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright 2023 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package histogram
-
-import (
-	"fmt"
-	"math"
-
-	"github.com/pkg/errors"
-)
-
-var (
-	ErrHistogramCountNotBigEnough    = errors.New("histogram's observation count should be at least the number of observations found in the buckets")
-	ErrHistogramCountMismatch        = errors.New("histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)")
-	ErrHistogramNegativeBucketCount  = errors.New("histogram has a bucket whose observation count is negative")
-	ErrHistogramSpanNegativeOffset   = errors.New("histogram has a span whose offset is negative")
-	ErrHistogramSpansBucketsMismatch = errors.New("histogram spans specify different number of buckets than provided")
-)
-
-func ValidateHistogram(h *Histogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount uint64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	sumOfBuckets := nCount + pCount + h.ZeroCount
-	if math.IsNaN(h.Sum) {
-		if sumOfBuckets > h.Count {
-			return errors.Wrap(
-				ErrHistogramCountNotBigEnough,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
-		}
-	} else {
-		if sumOfBuckets != h.Count {
-			return errors.Wrap(
-				ErrHistogramCountMismatch,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
-		}
-	}
-
-	return nil
-}
-
-func ValidateFloatHistogram(h *FloatHistogram) error {
-	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-	var nCount, pCount float64
-	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
-	if err != nil {
-		return errors.Wrap(err, "negative side")
-	}
-	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
-	if err != nil {
-		return errors.Wrap(err, "positive side")
-	}
-
-	// We do not check for h.Count being at least as large as the sum of the
-	// counts in the buckets because floating point precision issues can
-	// create false positives here.
-
-	return nil
-}
-
-func checkHistogramSpans(spans []Span, numBuckets int) error {
-	var spanBuckets int
-	for n, span := range spans {
-		if n > 0 && span.Offset < 0 {
-			return errors.Wrap(
-				ErrHistogramSpanNegativeOffset,
-				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
-			)
-		}
-		spanBuckets += int(span.Length)
-	}
-	if spanBuckets != numBuckets {
-		return errors.Wrap(
-			ErrHistogramSpansBucketsMismatch,
-			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
-		)
-	}
-	return nil
-}
-
-func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IBC, count *BC, deltas bool) error {
-	if len(buckets) == 0 {
-		return nil
-	}
-
-	var last IBC
-	for i := 0; i < len(buckets); i++ {
-		var c IBC
-		if deltas {
-			c = last + buckets[i]
-		} else {
-			c = buckets[i]
-		}
-		if c < 0 {
-			return errors.Wrap(
-				ErrHistogramNegativeBucketCount,
-				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
-			)
-		}
-		last = c
-		*count += BC(c)
-	}
-
-	return nil
-}
diff --git a/model/histogram/validate_test.go b/model/histogram/validate_test.go
deleted file mode 100644
index d9d8f06399..0000000000
--- a/model/histogram/validate_test.go
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright 2023 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package histogram
-
-import (
-	"math"
-	"testing"
-
-	"github.com/stretchr/testify/require"
-)
-
-func TestHistogramValidation(t *testing.T) {
-	tests := map[string]struct {
-		h         *Histogram
-		errMsg    string
-		skipFloat bool
-	}{
-		"valid histogram": {
-			h: &Histogram{
-				Count:         12,
-				ZeroCount:     2,
-				ZeroThreshold: 0.001,
-				Sum:           19.4,
-				Schema:        1,
-				PositiveSpans: []Span{
-					{Offset: 0, Length: 2},
-					{Offset: 1, Length: 2},
-				},
-				PositiveBuckets: []int64{1, 1, -1, 0},
-				NegativeSpans: []Span{
-					{Offset: 0, Length: 2},
-					{Offset: 1, Length: 2},
-				},
-				NegativeBuckets: []int64{1, 1, -1, 0},
-			},
-		},
-		"valid histogram with NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
-			// This case is possible if NaN values (which do not fall into any bucket) are observed.
-			h: &Histogram{
-				ZeroCount:       2,
-				Count:           4,
-				Sum:             math.NaN(),
-				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1},
-			},
-		},
-		"rejects histogram without NaN observations that has its Count (4) higher than the actual total of buckets (2 + 1)": {
-			h: &Histogram{
-				ZeroCount:       2,
-				Count:           4,
-				Sum:             333,
-				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `3 observations found in buckets, but the Count field is 4: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-		"rejects histogram that has too few negative buckets": {
-			h: &Histogram{
-				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too few positive buckets": {
-			h: &Histogram{
-				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 0 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many negative buckets": {
-			h: &Histogram{
-				NegativeSpans:   []Span{{Offset: 0, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects histogram that has too many positive buckets": {
-			h: &Histogram{
-				PositiveSpans:   []Span{{Offset: 0, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: spans need 1 buckets, have 2 buckets: histogram spans specify different number of buckets than provided`,
-		},
-		"rejects a histogram that has a negative span with a negative offset": {
-			h: &Histogram{
-				NegativeSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1, 2},
-			},
-			errMsg: `negative side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram which has a positive span with a negative offset": {
-			h: &Histogram{
-				PositiveSpans:   []Span{{Offset: -1, Length: 1}, {Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{1, 2},
-			},
-			errMsg: `positive side: span number 2 with offset -1: histogram has a span whose offset is negative`,
-		},
-		"rejects a histogram that has a negative bucket with a negative count": {
-			h: &Histogram{
-				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{-1},
-			},
-			errMsg: `negative side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a positive bucket with a negative count": {
-			h: &Histogram{
-				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
-				PositiveBuckets: []int64{-1},
-			},
-			errMsg: `positive side: bucket number 1 has observation count of -1: histogram has a bucket whose observation count is negative`,
-		},
-		"rejects a histogram that has a lower count than count in buckets": {
-			h: &Histogram{
-				Count:           0,
-				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `2 observations found in buckets, but the Count field is 0: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-		"rejects a histogram that doesn't count the zero bucket in its count": {
-			h: &Histogram{
-				Count:           2,
-				ZeroCount:       1,
-				NegativeSpans:   []Span{{Offset: -1, Length: 1}},
-				PositiveSpans:   []Span{{Offset: -1, Length: 1}},
-				NegativeBuckets: []int64{1},
-				PositiveBuckets: []int64{1},
-			},
-			errMsg:    `3 observations found in buckets, but the Count field is 2: histogram's observation count should equal the number of observations found in the buckets (in absence of NaN)`,
-			skipFloat: true,
-		},
-	}
-
-	for testName, tc := range tests {
-		t.Run(testName, func(t *testing.T) {
-			if err := ValidateHistogram(tc.h); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-			if tc.skipFloat {
-				return
-			}
-			if err := ValidateFloatHistogram(tc.h.ToFloat()); tc.errMsg != "" {
-				require.EqualError(t, err, tc.errMsg)
-			} else {
-				require.NoError(t, err)
-			}
-		})
-	}
-}
-
-func BenchmarkHistogramValidation(b *testing.B) {
-	histograms := GenerateBigTestHistograms(b.N, 500)
-	b.ResetTimer()
-	for _, h := range histograms {
-		require.NoError(b, ValidateHistogram(h))
-	}
-}
diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go
index 188b10585a..e4d44afa27 100644
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@@ -883,13 +883,13 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
 
 func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
 	if h != nil {
-		if err := histogram.ValidateHistogram(h); err != nil {
+		if err := h.Validate(); err != nil {
 			return 0, err
 		}
 	}
 
 	if fh != nil {
-		if err := histogram.ValidateFloatHistogram(fh); err != nil {
+		if err := fh.Validate(); err != nil {
 			return 0, err
 		}
 	}
diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index eeaaa369f3..3663c800ae 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -521,13 +521,13 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels
 	}
 
 	if h != nil {
-		if err := histogram.ValidateHistogram(h); err != nil {
+		if err := h.Validate(); err != nil {
 			return 0, err
 		}
 	}
 
 	if fh != nil {
-		if err := histogram.ValidateFloatHistogram(fh); err != nil {
+		if err := fh.Validate(); err != nil {
 			return 0, err
 		}
 	}

From 222d46d24351cd15f64636970ae888501f879a77 Mon Sep 17 00:00:00 2001
From: Julien Pivotto <roidelapluie@o11y.eu>
Date: Fri, 3 Nov 2023 15:34:31 -0400
Subject: [PATCH 16/66] Linode: Add GPU label

Signed-off-by: Julien Pivotto <roidelapluie@o11y.eu>
---
 discovery/linode/linode.go      | 2 ++
 discovery/linode/linode_test.go | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go
index 63213c87b2..a5e047b948 100644
--- a/discovery/linode/linode.go
+++ b/discovery/linode/linode.go
@@ -51,6 +51,7 @@ const (
 	linodeLabelStatus             = linodeLabel + "status"
 	linodeLabelTags               = linodeLabel + "tags"
 	linodeLabelGroup              = linodeLabel + "group"
+	linodeLabelGPUs               = linodeLabel + "gpus"
 	linodeLabelHypervisor         = linodeLabel + "hypervisor"
 	linodeLabelBackups            = linodeLabel + "backups"
 	linodeLabelSpecsDiskBytes     = linodeLabel + "specs_disk_bytes"
@@ -302,6 +303,7 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
 			linodeLabelType:               model.LabelValue(instance.Type),
 			linodeLabelStatus:             model.LabelValue(instance.Status),
 			linodeLabelGroup:              model.LabelValue(instance.Group),
+			linodeLabelGPUs:               model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)),
 			linodeLabelHypervisor:         model.LabelValue(instance.Hypervisor),
 			linodeLabelBackups:            model.LabelValue(backupsStatus),
 			linodeLabelSpecsDiskBytes:     model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)),
diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go
index 67eb8198e8..988313b702 100644
--- a/discovery/linode/linode_test.go
+++ b/discovery/linode/linode_test.go
@@ -85,6 +85,7 @@ func TestLinodeSDRefresh(t *testing.T) {
 			"__meta_linode_status":               model.LabelValue("running"),
 			"__meta_linode_tags":                 model.LabelValue(",monitoring,"),
 			"__meta_linode_group":                model.LabelValue(""),
+			"__meta_linode_gpus":                 model.LabelValue("0"),
 			"__meta_linode_hypervisor":           model.LabelValue("kvm"),
 			"__meta_linode_backups":              model.LabelValue("disabled"),
 			"__meta_linode_specs_disk_bytes":     model.LabelValue("85899345920"),
@@ -109,6 +110,7 @@ func TestLinodeSDRefresh(t *testing.T) {
 			"__meta_linode_status":               model.LabelValue("running"),
 			"__meta_linode_tags":                 model.LabelValue(",monitoring,"),
 			"__meta_linode_group":                model.LabelValue(""),
+			"__meta_linode_gpus":                 model.LabelValue("0"),
 			"__meta_linode_hypervisor":           model.LabelValue("kvm"),
 			"__meta_linode_backups":              model.LabelValue("disabled"),
 			"__meta_linode_specs_disk_bytes":     model.LabelValue("85899345920"),
@@ -132,6 +134,7 @@ func TestLinodeSDRefresh(t *testing.T) {
 			"__meta_linode_status":               model.LabelValue("running"),
 			"__meta_linode_tags":                 model.LabelValue(",monitoring,"),
 			"__meta_linode_group":                model.LabelValue(""),
+			"__meta_linode_gpus":                 model.LabelValue("0"),
 			"__meta_linode_hypervisor":           model.LabelValue("kvm"),
 			"__meta_linode_backups":              model.LabelValue("disabled"),
 			"__meta_linode_specs_disk_bytes":     model.LabelValue("53687091200"),
@@ -155,6 +158,7 @@ func TestLinodeSDRefresh(t *testing.T) {
 			"__meta_linode_status":               model.LabelValue("running"),
 			"__meta_linode_tags":                 model.LabelValue(",monitoring,"),
 			"__meta_linode_group":                model.LabelValue(""),
+			"__meta_linode_gpus":                 model.LabelValue("0"),
 			"__meta_linode_hypervisor":           model.LabelValue("kvm"),
 			"__meta_linode_backups":              model.LabelValue("disabled"),
 			"__meta_linode_specs_disk_bytes":     model.LabelValue("26843545600"),

From 75b59e0f3d2361779b008ac0cebb3d47a9f324bd Mon Sep 17 00:00:00 2001
From: SuperQ <superq@gmail.com>
Date: Tue, 5 Sep 2023 06:27:33 +0200
Subject: [PATCH 17/66] Update golangci-lint.

Update golangci-lint for Go 1.21.
* Use consistent go-version syntax.

Signed-off-by: SuperQ <superq@gmail.com>
---
 .github/workflows/ci.yml              | 4 ++--
 .github/workflows/codeql-analysis.yml | 2 +-
 scripts/golangci-lint.yml             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 08fbb0a339..8b5e0e1254 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,7 +55,7 @@ jobs:
       - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
       - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
-          go-version: '>=1.21 <1.22'
+          go-version: 1.21.x
       - run: |
           $TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
           go test $TestTargets -vet=off -v
@@ -143,7 +143,7 @@ jobs:
         uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
           cache: false
-          go-version: 1.20.x
+          go-version: 1.21.x
       - name: Install snmp_exporter/generator dependencies
         run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
         if: github.repository == 'prometheus/snmp_exporter'
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index d98233757d..b5ed4a0a39 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -27,7 +27,7 @@ jobs:
         uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0
       - uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
         with:
-          go-version: '>=1.21 <1.22'
+          go-version: 1.21.x
 
       - name: Initialize CodeQL
         uses: github/codeql-action/init@74483a38d39275f33fcff5f35b679b5ca4a26a99 # v2.22.5
diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml
index babd8a0c46..ffa6b3090e 100644
--- a/scripts/golangci-lint.yml
+++ b/scripts/golangci-lint.yml
@@ -22,7 +22,7 @@ jobs:
       - name: install Go
         uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0
         with:
-          go-version: 1.20.x
+          go-version: 1.21.x
       - name: Install snmp_exporter/generator dependencies
         run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
         if: github.repository == 'prometheus/snmp_exporter'

From 05fba53e57cddbfaeb475e65089df922f868760f Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Wed, 8 Nov 2023 04:49:39 +0100
Subject: [PATCH 18/66] web : use Go standard package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 .golangci.yml                      |  7 ----
 model/histogram/float_histogram.go | 10 +++---
 model/histogram/generic.go         | 18 +++--------
 model/histogram/histogram.go       | 19 ++++-------
 web/api/v1/api.go                  | 52 ++++++++++++++----------------
 web/api/v1/api_test.go             |  6 ++--
 web/api/v1/errors_test.go          |  2 +-
 web/federate.go                    |  4 +--
 web/federate_test.go               | 10 +++---
 web/web.go                         |  3 +-
 10 files changed, 51 insertions(+), 80 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index 2d53106c01..666d22cbe4 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -41,13 +41,6 @@ issues:
       text: "import 'github.com/pkg/errors' is not allowed"
       linters:
         - depguard
-    - path: web/
-      linters:
-        - errorlint
-    - path: web/
-      text: "import 'github.com/pkg/errors' is not allowed"
-      linters:
-        - depguard
     - linters:
         - godot
       source: "^// ==="
diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index fd6c2560f1..22d33f5a4e 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -17,8 +17,6 @@ import (
 	"fmt"
 	"math"
 	"strings"
-
-	"github.com/pkg/errors"
 )
 
 // FloatHistogram is similar to Histogram but uses float64 for all
@@ -602,19 +600,19 @@ func (h *FloatHistogram) AllReverseBucketIterator() BucketIterator[float64] {
 // create false positives here.
 func (h *FloatHistogram) Validate() error {
 	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
+		return fmt.Errorf("negative side: %w", err)
 	}
 	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
+		return fmt.Errorf("positive side: %w", err)
 	}
 	var nCount, pCount float64
 	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, false)
 	if err != nil {
-		return errors.Wrap(err, "negative side")
+		return fmt.Errorf("negative side: %w", err)
 	}
 	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, false)
 	if err != nil {
-		return errors.Wrap(err, "positive side")
+		return fmt.Errorf("positive side: %w", err)
 	}
 
 	return nil
diff --git a/model/histogram/generic.go b/model/histogram/generic.go
index 22048c44ef..7e4eb1ecb1 100644
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@@ -14,11 +14,10 @@
 package histogram
 
 import (
+	"errors"
 	"fmt"
 	"math"
 	"strings"
-
-	"github.com/pkg/errors"
 )
 
 var (
@@ -361,18 +360,12 @@ func checkHistogramSpans(spans []Span, numBuckets int) error {
 	var spanBuckets int
 	for n, span := range spans {
 		if n > 0 && span.Offset < 0 {
-			return errors.Wrap(
-				ErrHistogramSpanNegativeOffset,
-				fmt.Sprintf("span number %d with offset %d", n+1, span.Offset),
-			)
+			return fmt.Errorf("span number %d with offset %d: %w", n+1, span.Offset, ErrHistogramSpanNegativeOffset)
 		}
 		spanBuckets += int(span.Length)
 	}
 	if spanBuckets != numBuckets {
-		return errors.Wrap(
-			ErrHistogramSpansBucketsMismatch,
-			fmt.Sprintf("spans need %d buckets, have %d buckets", spanBuckets, numBuckets),
-		)
+		return fmt.Errorf("spans need %d buckets, have %d buckets: %w", spanBuckets, numBuckets, ErrHistogramSpansBucketsMismatch)
 	}
 	return nil
 }
@@ -391,10 +384,7 @@ func checkHistogramBuckets[BC BucketCount, IBC InternalBucketCount](buckets []IB
 			c = buckets[i]
 		}
 		if c < 0 {
-			return errors.Wrap(
-				ErrHistogramNegativeBucketCount,
-				fmt.Sprintf("bucket number %d has observation count of %v", i+1, c),
-			)
+			return fmt.Errorf("bucket number %d has observation count of %v: %w", i+1, c, ErrHistogramNegativeBucketCount)
 		}
 		last = c
 		*count += BC(c)
diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go
index fa624841e3..30c23e5e79 100644
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@@ -18,7 +18,6 @@ import (
 	"math"
 	"strings"
 
-	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 )
 
@@ -338,35 +337,29 @@ func (h *Histogram) ToFloat() *FloatHistogram {
 // the total h.Count).
 func (h *Histogram) Validate() error {
 	if err := checkHistogramSpans(h.NegativeSpans, len(h.NegativeBuckets)); err != nil {
-		return errors.Wrap(err, "negative side")
+		return fmt.Errorf("negative side: %w", err)
 	}
 	if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil {
-		return errors.Wrap(err, "positive side")
+		return fmt.Errorf("positive side: %w", err)
 	}
 	var nCount, pCount uint64
 	err := checkHistogramBuckets(h.NegativeBuckets, &nCount, true)
 	if err != nil {
-		return errors.Wrap(err, "negative side")
+		return fmt.Errorf("negative side: %w", err)
 	}
 	err = checkHistogramBuckets(h.PositiveBuckets, &pCount, true)
 	if err != nil {
-		return errors.Wrap(err, "positive side")
+		return fmt.Errorf("positive side: %w", err)
 	}
 
 	sumOfBuckets := nCount + pCount + h.ZeroCount
 	if math.IsNaN(h.Sum) {
 		if sumOfBuckets > h.Count {
-			return errors.Wrap(
-				ErrHistogramCountNotBigEnough,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
+			return fmt.Errorf("%d observations found in buckets, but the Count field is %d: %w", sumOfBuckets, h.Count, ErrHistogramCountNotBigEnough)
 		}
 	} else {
 		if sumOfBuckets != h.Count {
-			return errors.Wrap(
-				ErrHistogramCountMismatch,
-				fmt.Sprintf("%d observations found in buckets, but the Count field is %d", sumOfBuckets, h.Count),
-			)
+			return fmt.Errorf("%d observations found in buckets, but the Count field is %d: %w", sumOfBuckets, h.Count, ErrHistogramCountMismatch)
 		}
 	}
 
diff --git a/web/api/v1/api.go b/web/api/v1/api.go
index 6c8128f3f1..34abe80aac 100644
--- a/web/api/v1/api.go
+++ b/web/api/v1/api.go
@@ -15,6 +15,7 @@ package v1
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"math"
 	"math/rand"
@@ -33,7 +34,6 @@ import (
 	"github.com/grafana/regexp"
 	jsoniter "github.com/json-iterator/go"
 	"github.com/munnerz/goautoneg"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/common/route"
@@ -317,7 +317,7 @@ func (api *API) ClearCodecs() {
 }
 
 func setUnavailStatusOnTSDBNotReady(r apiFuncResult) apiFuncResult {
-	if r.err != nil && errors.Cause(r.err.err) == tsdb.ErrNotReady {
+	if r.err != nil && errors.Is(r.err.err, tsdb.ErrNotReady) {
 		r.err.typ = errorUnavailable
 	}
 	return r
@@ -415,7 +415,7 @@ type QueryData struct {
 
 func invalidParamError(err error, parameter string) apiFuncResult {
 	return apiFuncResult{nil, &apiError{
-		errorBadData, errors.Wrapf(err, "invalid parameter %q", parameter),
+		errorBadData, fmt.Errorf("invalid parameter %q: %w", parameter, err),
 	}, nil, nil}
 }
 
@@ -624,17 +624,15 @@ func returnAPIError(err error) *apiError {
 		return nil
 	}
 
-	cause := errors.Unwrap(err)
-	if cause == nil {
-		cause = err
-	}
-
-	switch cause.(type) {
-	case promql.ErrQueryCanceled:
+	var eqc promql.ErrQueryCanceled
+	var eqt promql.ErrQueryTimeout
+	var es promql.ErrStorage
+	switch {
+	case errors.As(err, &eqc):
 		return &apiError{errorCanceled, err}
-	case promql.ErrQueryTimeout:
+	case errors.As(err, &eqt):
 		return &apiError{errorTimeout, err}
-	case promql.ErrStorage:
+	case errors.As(err, &es):
 		return &apiError{errorInternal, err}
 	}
 
@@ -709,7 +707,7 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) {
 	name := route.Param(ctx, "name")
 
 	if !model.LabelNameRE.MatchString(name) {
-		return apiFuncResult{nil, &apiError{errorBadData, errors.Errorf("invalid label name: %q", name)}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("invalid label name: %q", name)}, nil, nil}
 	}
 
 	start, err := parseTimeParam(r, "start", MinTime)
@@ -797,7 +795,7 @@ func (api *API) series(r *http.Request) (result apiFuncResult) {
 	ctx := r.Context()
 
 	if err := r.ParseForm(); err != nil {
-		return apiFuncResult{nil, &apiError{errorBadData, errors.Wrapf(err, "error parsing form values")}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("error parsing form values: %w", err)}, nil, nil}
 	}
 	if len(r.Form["match[]"]) == 0 {
 		return apiFuncResult{nil, &apiError{errorBadData, errors.New("no match[] parameter provided")}, nil, nil}
@@ -1028,7 +1026,7 @@ func (api *API) targets(r *http.Request) apiFuncResult {
 						case err == nil && lastErrStr == "":
 							return ""
 						case err != nil:
-							return errors.Wrapf(err, lastErrStr).Error()
+							return fmt.Errorf("%s: %w", lastErrStr, err).Error()
 						default:
 							return lastErrStr
 						}
@@ -1347,7 +1345,7 @@ type RecordingRule struct {
 
 func (api *API) rules(r *http.Request) apiFuncResult {
 	if err := r.ParseForm(); err != nil {
-		return apiFuncResult{nil, &apiError{errorBadData, errors.Wrapf(err, "error parsing form values")}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("error parsing form values: %w", err)}, nil, nil}
 	}
 
 	queryFormToSet := func(values []string) map[string]struct{} {
@@ -1367,7 +1365,7 @@ func (api *API) rules(r *http.Request) apiFuncResult {
 	typ := strings.ToLower(r.URL.Query().Get("type"))
 
 	if typ != "" && typ != "alert" && typ != "record" {
-		return invalidParamError(errors.Errorf("not supported value %q", typ), "type")
+		return invalidParamError(fmt.Errorf("not supported value %q", typ), "type")
 	}
 
 	returnAlerts := typ == "" || typ == "alert"
@@ -1453,7 +1451,7 @@ func (api *API) rules(r *http.Request) apiFuncResult {
 					Type:           "recording",
 				}
 			default:
-				err := errors.Errorf("failed to assert type of rule '%v'", rule.Name())
+				err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name())
 				return apiFuncResult{nil, &apiError{errorInternal, err}, nil, nil}
 			}
 
@@ -1560,7 +1558,7 @@ func (api *API) serveTSDBStatus(r *http.Request) apiFuncResult {
 	}
 	metrics, err := api.gatherer.Gather()
 	if err != nil {
-		return apiFuncResult{nil, &apiError{errorInternal, fmt.Errorf("error gathering runtime status: %s", err)}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorInternal, fmt.Errorf("error gathering runtime status: %w", err)}, nil, nil}
 	}
 	chunkCount := int64(math.NaN())
 	for _, mF := range metrics {
@@ -1636,7 +1634,7 @@ func (api *API) deleteSeries(r *http.Request) apiFuncResult {
 		return apiFuncResult{nil, &apiError{errorUnavailable, errors.New("admin APIs disabled")}, nil, nil}
 	}
 	if err := r.ParseForm(); err != nil {
-		return apiFuncResult{nil, &apiError{errorBadData, errors.Wrap(err, "error parsing form values")}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("error parsing form values: %w", err)}, nil, nil}
 	}
 	if len(r.Form["match[]"]) == 0 {
 		return apiFuncResult{nil, &apiError{errorBadData, errors.New("no match[] parameter provided")}, nil, nil}
@@ -1675,7 +1673,7 @@ func (api *API) snapshot(r *http.Request) apiFuncResult {
 	if r.FormValue("skip_head") != "" {
 		skipHead, err = strconv.ParseBool(r.FormValue("skip_head"))
 		if err != nil {
-			return invalidParamError(errors.Wrapf(err, "unable to parse boolean"), "skip_head")
+			return invalidParamError(fmt.Errorf("unable to parse boolean: %w", err), "skip_head")
 		}
 	}
 
@@ -1687,10 +1685,10 @@ func (api *API) snapshot(r *http.Request) apiFuncResult {
 		dir = filepath.Join(snapdir, name)
 	)
 	if err := os.MkdirAll(dir, 0o777); err != nil {
-		return apiFuncResult{nil, &apiError{errorInternal, errors.Wrap(err, "create snapshot directory")}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorInternal, fmt.Errorf("create snapshot directory: %w", err)}, nil, nil}
 	}
 	if err := api.db.Snapshot(dir, !skipHead); err != nil {
-		return apiFuncResult{nil, &apiError{errorInternal, errors.Wrap(err, "create snapshot")}, nil, nil}
+		return apiFuncResult{nil, &apiError{errorInternal, fmt.Errorf("create snapshot: %w", err)}, nil, nil}
 	}
 
 	return apiFuncResult{struct {
@@ -1805,7 +1803,7 @@ func parseTimeParam(r *http.Request, paramName string, defaultValue time.Time) (
 	}
 	result, err := parseTime(val)
 	if err != nil {
-		return time.Time{}, errors.Wrapf(err, "Invalid time value for '%s'", paramName)
+		return time.Time{}, fmt.Errorf("Invalid time value for '%s': %w", paramName, err)
 	}
 	return result, nil
 }
@@ -1830,21 +1828,21 @@ func parseTime(s string) (time.Time, error) {
 	case maxTimeFormatted:
 		return MaxTime, nil
 	}
-	return time.Time{}, errors.Errorf("cannot parse %q to a valid timestamp", s)
+	return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s)
 }
 
 func parseDuration(s string) (time.Duration, error) {
 	if d, err := strconv.ParseFloat(s, 64); err == nil {
 		ts := d * float64(time.Second)
 		if ts > float64(math.MaxInt64) || ts < float64(math.MinInt64) {
-			return 0, errors.Errorf("cannot parse %q to a valid duration. It overflows int64", s)
+			return 0, fmt.Errorf("cannot parse %q to a valid duration. It overflows int64", s)
 		}
 		return time.Duration(ts), nil
 	}
 	if d, err := model.ParseDuration(s); err == nil {
 		return time.Duration(d), nil
 	}
-	return 0, errors.Errorf("cannot parse %q to a valid duration", s)
+	return 0, fmt.Errorf("cannot parse %q to a valid duration", s)
 }
 
 func parseMatchersParam(matchers []string) ([][]*labels.Matcher, error) {
diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go
index 320d174fce..a5dd8640b3 100644
--- a/web/api/v1/api_test.go
+++ b/web/api/v1/api_test.go
@@ -16,6 +16,7 @@ package v1
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -33,7 +34,6 @@ import (
 	"github.com/prometheus/prometheus/util/stats"
 
 	"github.com/go-kit/log"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	config_util "github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
@@ -2974,7 +2974,7 @@ func (f *fakeDB) WALReplayStatus() (tsdb.WALReplayStatus, error) {
 }
 
 func TestAdminEndpoints(t *testing.T) {
-	tsdb, tsdbWithError, tsdbNotReady := &fakeDB{}, &fakeDB{err: errors.New("some error")}, &fakeDB{err: errors.Wrap(tsdb.ErrNotReady, "wrap")}
+	tsdb, tsdbWithError, tsdbNotReady := &fakeDB{}, &fakeDB{err: errors.New("some error")}, &fakeDB{err: fmt.Errorf("wrap: %w", tsdb.ErrNotReady)}
 	snapshotAPI := func(api *API) apiFunc { return api.snapshot }
 	cleanAPI := func(api *API) apiFunc { return api.cleanTombstones }
 	deleteAPI := func(api *API) apiFunc { return api.deleteSeries }
@@ -3354,7 +3354,7 @@ func TestParseTimeParam(t *testing.T) {
 				asTime: time.Time{},
 				asError: func() error {
 					_, err := parseTime("baz")
-					return errors.Wrapf(err, "Invalid time value for '%s'", "foo")
+					return fmt.Errorf("Invalid time value for '%s': %w", "foo", err)
 				},
 			},
 		},
diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go
index 38ca5b62c0..b6ec7d4e1f 100644
--- a/web/api/v1/errors_test.go
+++ b/web/api/v1/errors_test.go
@@ -15,6 +15,7 @@ package v1
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"net/http"
 	"net/http/httptest"
@@ -24,7 +25,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/grafana/regexp"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/route"
 	"github.com/stretchr/testify/require"
diff --git a/web/federate.go b/web/federate.go
index babc97e55d..2b79d00532 100644
--- a/web/federate.go
+++ b/web/federate.go
@@ -14,6 +14,7 @@
 package web
 
 import (
+	"errors"
 	"fmt"
 	"net/http"
 	"sort"
@@ -21,7 +22,6 @@ import (
 
 	"github.com/go-kit/log/level"
 	"github.com/gogo/protobuf/proto"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/expfmt"
@@ -86,7 +86,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) {
 	q, err := h.localStorage.Querier(mint, maxt)
 	if err != nil {
 		federationErrors.Inc()
-		if errors.Cause(err) == tsdb.ErrNotReady {
+		if errors.Is(err, tsdb.ErrNotReady) {
 			http.Error(w, err.Error(), http.StatusServiceUnavailable)
 			return
 		}
diff --git a/web/federate_test.go b/web/federate_test.go
index ab93dcf281..80539861d9 100644
--- a/web/federate_test.go
+++ b/web/federate_test.go
@@ -16,6 +16,7 @@ package web
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -25,7 +26,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
@@ -238,15 +238,15 @@ type notReadyReadStorage struct {
 }
 
 func (notReadyReadStorage) Querier(int64, int64) (storage.Querier, error) {
-	return nil, errors.Wrap(tsdb.ErrNotReady, "wrap")
+	return nil, fmt.Errorf("wrap: %w", tsdb.ErrNotReady)
 }
 
 func (notReadyReadStorage) StartTime() (int64, error) {
-	return 0, errors.Wrap(tsdb.ErrNotReady, "wrap")
+	return 0, fmt.Errorf("wrap: %w", tsdb.ErrNotReady)
 }
 
 func (notReadyReadStorage) Stats(string, int) (*tsdb.Stats, error) {
-	return nil, errors.Wrap(tsdb.ErrNotReady, "wrap")
+	return nil, fmt.Errorf("wrap: %w", tsdb.ErrNotReady)
 }
 
 // Regression test for https://github.com/prometheus/prometheus/issues/7181.
@@ -396,7 +396,7 @@ func TestFederationWithNativeHistograms(t *testing.T) {
 	l := labels.Labels{}
 	for {
 		et, err := p.Next()
-		if err == io.EOF {
+		if err != nil && errors.Is(err, io.EOF) {
 			break
 		}
 		require.NoError(t, err)
diff --git a/web/web.go b/web/web.go
index ccf97805a4..43b79c235d 100644
--- a/web/web.go
+++ b/web/web.go
@@ -40,7 +40,6 @@ import (
 	"github.com/go-kit/log/level"
 	"github.com/grafana/regexp"
 	"github.com/mwitkow/go-conntrack"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	io_prometheus_client "github.com/prometheus/client_model/go"
@@ -732,7 +731,7 @@ func (h *Handler) runtimeInfo() (api_v1.RuntimeInfo, error) {
 
 	metrics, err := h.gatherer.Gather()
 	if err != nil {
-		return status, errors.Errorf("error gathering runtime status: %s", err)
+		return status, fmt.Errorf("error gathering runtime status: %w", err)
 	}
 	for _, mF := range metrics {
 		switch *mF.Name {

From 724737006dc3122dc7487741159d24cf84b1c76e Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Wed, 8 Nov 2023 09:22:31 +0100
Subject: [PATCH 19/66] tsdb/agent: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/agent/db.go | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go
index e4d44afa27..66861a487c 100644
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@@ -15,6 +15,7 @@ package agent
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"math"
 	"path/filepath"
@@ -24,7 +25,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"go.uber.org/atomic"
@@ -263,7 +263,7 @@ func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir strin
 
 	w, err := wlog.NewSize(l, reg, dir, opts.WALSegmentSize, opts.WALCompression)
 	if err != nil {
-		return nil, errors.Wrap(err, "creating WAL")
+		return nil, fmt.Errorf("creating WAL: %w", err)
 	}
 
 	db := &DB{
@@ -302,7 +302,7 @@ func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir strin
 	if err := db.replayWAL(); err != nil {
 		level.Warn(db.logger).Log("msg", "encountered WAL read error, attempting repair", "err", err)
 		if err := w.Repair(err); err != nil {
-			return nil, errors.Wrap(err, "repair corrupted WAL")
+			return nil, fmt.Errorf("repair corrupted WAL: %w", err)
 		}
 		level.Info(db.logger).Log("msg", "successfully repaired WAL")
 	}
@@ -352,7 +352,7 @@ func (db *DB) replayWAL() error {
 
 	dir, startFrom, err := wlog.LastCheckpoint(db.wal.Dir())
 	if err != nil && err != record.ErrNotFound {
-		return errors.Wrap(err, "find last checkpoint")
+		return fmt.Errorf("find last checkpoint: %w", err)
 	}
 
 	multiRef := map[chunks.HeadSeriesRef]chunks.HeadSeriesRef{}
@@ -360,7 +360,7 @@ func (db *DB) replayWAL() error {
 	if err == nil {
 		sr, err := wlog.NewSegmentsReader(dir)
 		if err != nil {
-			return errors.Wrap(err, "open checkpoint")
+			return fmt.Errorf("open checkpoint: %w", err)
 		}
 		defer func() {
 			if err := sr.Close(); err != nil {
@@ -371,7 +371,7 @@ func (db *DB) replayWAL() error {
 		// A corrupted checkpoint is a hard error for now and requires user
 		// intervention. There's likely little data that can be recovered anyway.
 		if err := db.loadWAL(wlog.NewReader(sr), multiRef); err != nil {
-			return errors.Wrap(err, "backfill checkpoint")
+			return fmt.Errorf("backfill checkpoint: %w", err)
 		}
 		startFrom++
 		level.Info(db.logger).Log("msg", "WAL checkpoint loaded")
@@ -380,14 +380,14 @@ func (db *DB) replayWAL() error {
 	// Find the last segment.
 	_, last, err := wlog.Segments(db.wal.Dir())
 	if err != nil {
-		return errors.Wrap(err, "finding WAL segments")
+		return fmt.Errorf("finding WAL segments: %w", err)
 	}
 
 	// Backfil segments from the most recent checkpoint onwards.
 	for i := startFrom; i <= last; i++ {
 		seg, err := wlog.OpenReadSegment(wlog.SegmentName(db.wal.Dir(), i))
 		if err != nil {
-			return errors.Wrap(err, fmt.Sprintf("open WAL segment: %d", i))
+			return fmt.Errorf("open WAL segment: %d: %w", i, err)
 		}
 
 		sr := wlog.NewSegmentBufReader(seg)
@@ -432,7 +432,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 				series, err = dec.Series(rec, series)
 				if err != nil {
 					errCh <- &wlog.CorruptionErr{
-						Err:     errors.Wrap(err, "decode series"),
+						Err:     fmt.Errorf("decode series: %w", err),
 						Segment: r.Segment(),
 						Offset:  r.Offset(),
 					}
@@ -444,7 +444,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 				samples, err = dec.Samples(rec, samples)
 				if err != nil {
 					errCh <- &wlog.CorruptionErr{
-						Err:     errors.Wrap(err, "decode samples"),
+						Err:     fmt.Errorf("decode samples: %w", err),
 						Segment: r.Segment(),
 						Offset:  r.Offset(),
 					}
@@ -456,7 +456,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 				histograms, err = dec.HistogramSamples(rec, histograms)
 				if err != nil {
 					errCh <- &wlog.CorruptionErr{
-						Err:     errors.Wrap(err, "decode histogram samples"),
+						Err:     fmt.Errorf("decode histogram samples: %w", err),
 						Segment: r.Segment(),
 						Offset:  r.Offset(),
 					}
@@ -468,7 +468,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 				floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms)
 				if err != nil {
 					errCh <- &wlog.CorruptionErr{
-						Err:     errors.Wrap(err, "decode float histogram samples"),
+						Err:     fmt.Errorf("decode float histogram samples: %w", err),
 						Segment: r.Segment(),
 						Offset:  r.Offset(),
 					}
@@ -482,7 +482,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 				continue
 			default:
 				errCh <- &wlog.CorruptionErr{
-					Err:     errors.Errorf("invalid record type %v", dec.Type(rec)),
+					Err:     fmt.Errorf("invalid record type %v", dec.Type(rec)),
 					Segment: r.Segment(),
 					Offset:  r.Offset(),
 				}
@@ -568,7 +568,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 		return err
 	default:
 		if r.Err() != nil {
-			return errors.Wrap(r.Err(), "read records")
+			return fmt.Errorf("read records: %w", r.Err())
 		}
 		return nil
 	}
@@ -622,13 +622,13 @@ func (db *DB) truncate(mint int64) error {
 
 	first, last, err := wlog.Segments(db.wal.Dir())
 	if err != nil {
-		return errors.Wrap(err, "get segment range")
+		return fmt.Errorf("get segment range: %w", err)
 	}
 
 	// Start a new segment so low ingestion volume instances don't have more WAL
 	// than needed.
 	if _, err := db.wal.NextSegment(); err != nil {
-		return errors.Wrap(err, "next segment")
+		return fmt.Errorf("next segment: %w", err)
 	}
 
 	last-- // Never consider most recent segment for checkpoint
@@ -656,10 +656,11 @@ func (db *DB) truncate(mint int64) error {
 
 	if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, keep, mint); err != nil {
 		db.metrics.checkpointCreationFail.Inc()
-		if _, ok := errors.Cause(err).(*wlog.CorruptionErr); ok {
+		var cerr *wlog.CorruptionErr
+		if errors.As(err, &cerr) {
 			db.metrics.walCorruptionsTotal.Inc()
 		}
-		return errors.Wrap(err, "create checkpoint")
+		return fmt.Errorf("create checkpoint: %w", err)
 	}
 	if err := db.wal.Truncate(last + 1); err != nil {
 		// If truncating fails, we'll just try it again at the next checkpoint.
@@ -780,11 +781,11 @@ func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v flo
 		// equivalent validation code in the TSDB's headAppender.
 		l = l.WithoutEmpty()
 		if l.IsEmpty() {
-			return 0, errors.Wrap(tsdb.ErrInvalidSample, "empty labelset")
+			return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample)
 		}
 
 		if lbl, dup := l.HasDuplicateLabelNames(); dup {
-			return 0, errors.Wrap(tsdb.ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, lbl))
+			return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample)
 		}
 
 		var created bool
@@ -841,7 +842,7 @@ func (a *appender) AppendExemplar(ref storage.SeriesRef, _ labels.Labels, e exem
 	e.Labels = e.Labels.WithoutEmpty()
 
 	if lbl, dup := e.Labels.HasDuplicateLabelNames(); dup {
-		return 0, errors.Wrap(tsdb.ErrInvalidExemplar, fmt.Sprintf(`label name "%s" is not unique`, lbl))
+		return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidExemplar)
 	}
 
 	// Exemplar label length does not include chars involved in text rendering such as quotes
@@ -903,11 +904,11 @@ func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int
 		// equivalent validation code in the TSDB's headAppender.
 		l = l.WithoutEmpty()
 		if l.IsEmpty() {
-			return 0, errors.Wrap(tsdb.ErrInvalidSample, "empty labelset")
+			return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample)
 		}
 
 		if lbl, dup := l.HasDuplicateLabelNames(); dup {
-			return 0, errors.Wrap(tsdb.ErrInvalidSample, fmt.Sprintf(`label name "%s" is not unique`, lbl))
+			return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample)
 		}
 
 		var created bool

From b60f9f801e7c4f13d6ed041106a7540ddf80edd6 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Wed, 8 Nov 2023 09:35:46 +0100
Subject: [PATCH 20/66] tsdb/chunkenc: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/chunkenc/chunk.go  | 11 +++++------
 tsdb/chunkenc/varbit.go |  7 +++----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go
index e7ff5b165e..f4d11986c4 100644
--- a/tsdb/chunkenc/chunk.go
+++ b/tsdb/chunkenc/chunk.go
@@ -14,11 +14,10 @@
 package chunkenc
 
 import (
+	"fmt"
 	"math"
 	"sync"
 
-	"github.com/pkg/errors"
-
 	"github.com/prometheus/prometheus/model/histogram"
 )
 
@@ -293,7 +292,7 @@ func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
 		c.b.count = 0
 		return c, nil
 	}
-	return nil, errors.Errorf("invalid chunk encoding %q", e)
+	return nil, fmt.Errorf("invalid chunk encoding %q", e)
 }
 
 func (p *pool) Put(c Chunk) error {
@@ -332,7 +331,7 @@ func (p *pool) Put(c Chunk) error {
 		sh.b.count = 0
 		p.floatHistogram.Put(c)
 	default:
-		return errors.Errorf("invalid chunk encoding %q", c.Encoding())
+		return fmt.Errorf("invalid chunk encoding %q", c.Encoding())
 	}
 	return nil
 }
@@ -349,7 +348,7 @@ func FromData(e Encoding, d []byte) (Chunk, error) {
 	case EncFloatHistogram:
 		return &FloatHistogramChunk{b: bstream{count: 0, stream: d}}, nil
 	}
-	return nil, errors.Errorf("invalid chunk encoding %q", e)
+	return nil, fmt.Errorf("invalid chunk encoding %q", e)
 }
 
 // NewEmptyChunk returns an empty chunk for the given encoding.
@@ -362,5 +361,5 @@ func NewEmptyChunk(e Encoding) (Chunk, error) {
 	case EncFloatHistogram:
 		return NewFloatHistogramChunk(), nil
 	}
-	return nil, errors.Errorf("invalid chunk encoding %q", e)
+	return nil, fmt.Errorf("invalid chunk encoding %q", e)
 }
diff --git a/tsdb/chunkenc/varbit.go b/tsdb/chunkenc/varbit.go
index 449f9fbac2..b43574dcb6 100644
--- a/tsdb/chunkenc/varbit.go
+++ b/tsdb/chunkenc/varbit.go
@@ -14,9 +14,8 @@
 package chunkenc
 
 import (
+	"fmt"
 	"math/bits"
-
-	"github.com/pkg/errors"
 )
 
 // putVarbitInt writes an int64 using varbit encoding with a bit bucketing
@@ -109,7 +108,7 @@ func readVarbitInt(b *bstreamReader) (int64, error) {
 
 		val = int64(bits)
 	default:
-		return 0, errors.Errorf("invalid bit pattern %b", d)
+		return 0, fmt.Errorf("invalid bit pattern %b", d)
 	}
 
 	if sz != 0 {
@@ -215,7 +214,7 @@ func readVarbitUint(b *bstreamReader) (uint64, error) {
 			return 0, err
 		}
 	default:
-		return 0, errors.Errorf("invalid bit pattern %b", d)
+		return 0, fmt.Errorf("invalid bit pattern %b", d)
 	}
 
 	if sz != 0 {

From ece8286305ce1bd9feca8632c3db28c7c38a5093 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Wed, 8 Nov 2023 10:02:59 +0100
Subject: [PATCH 21/66] tsdb/chunk: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/chunks/chunks.go      | 38 ++++++++++++--------------
 tsdb/chunks/head_chunks.go | 56 +++++++++++++++++++-------------------
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go
index 2d5fba7335..c4c4e3c933 100644
--- a/tsdb/chunks/chunks.go
+++ b/tsdb/chunks/chunks.go
@@ -24,8 +24,6 @@ import (
 	"path/filepath"
 	"strconv"
 
-	"github.com/pkg/errors"
-
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
 	tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
 	"github.com/prometheus/prometheus/tsdb/fileutil"
@@ -285,7 +283,7 @@ func checkCRC32(data, sum []byte) error {
 	// This combination of shifts is the inverse of digest.Sum() in go/src/hash/crc32.
 	want := uint32(sum[0])<<24 + uint32(sum[1])<<16 + uint32(sum[2])<<8 + uint32(sum[3])
 	if got != want {
-		return errors.Errorf("checksum mismatch expected:%x, actual:%x", want, got)
+		return fmt.Errorf("checksum mismatch expected:%x, actual:%x", want, got)
 	}
 	return nil
 }
@@ -398,12 +396,12 @@ func (w *Writer) cut() error {
 func cutSegmentFile(dirFile *os.File, magicNumber uint32, chunksFormat byte, allocSize int64) (headerSize int, newFile *os.File, seq int, returnErr error) {
 	p, seq, err := nextSequenceFile(dirFile.Name())
 	if err != nil {
-		return 0, nil, 0, errors.Wrap(err, "next sequence file")
+		return 0, nil, 0, fmt.Errorf("next sequence file: %w", err)
 	}
 	ptmp := p + ".tmp"
 	f, err := os.OpenFile(ptmp, os.O_WRONLY|os.O_CREATE, 0o666)
 	if err != nil {
-		return 0, nil, 0, errors.Wrap(err, "open temp file")
+		return 0, nil, 0, fmt.Errorf("open temp file: %w", err)
 	}
 	defer func() {
 		if returnErr != nil {
@@ -418,11 +416,11 @@ func cutSegmentFile(dirFile *os.File, magicNumber uint32, chunksFormat byte, all
 	}()
 	if allocSize > 0 {
 		if err = fileutil.Preallocate(f, allocSize, true); err != nil {
-			return 0, nil, 0, errors.Wrap(err, "preallocate")
+			return 0, nil, 0, fmt.Errorf("preallocate: %w", err)
 		}
 	}
 	if err = dirFile.Sync(); err != nil {
-		return 0, nil, 0, errors.Wrap(err, "sync directory")
+		return 0, nil, 0, fmt.Errorf("sync directory: %w", err)
 	}
 
 	// Write header metadata for new file.
@@ -432,24 +430,24 @@ func cutSegmentFile(dirFile *os.File, magicNumber uint32, chunksFormat byte, all
 
 	n, err := f.Write(metab)
 	if err != nil {
-		return 0, nil, 0, errors.Wrap(err, "write header")
+		return 0, nil, 0, fmt.Errorf("write header: %w", err)
 	}
 	if err := f.Close(); err != nil {
-		return 0, nil, 0, errors.Wrap(err, "close temp file")
+		return 0, nil, 0, fmt.Errorf("close temp file: %w", err)
 	}
 	f = nil
 
 	if err := fileutil.Rename(ptmp, p); err != nil {
-		return 0, nil, 0, errors.Wrap(err, "replace file")
+		return 0, nil, 0, fmt.Errorf("replace file: %w", err)
 	}
 
 	f, err = os.OpenFile(p, os.O_WRONLY, 0o666)
 	if err != nil {
-		return 0, nil, 0, errors.Wrap(err, "open final file")
+		return 0, nil, 0, fmt.Errorf("open final file: %w", err)
 	}
 	// Skip header for further writes.
 	if _, err := f.Seek(int64(n), 0); err != nil {
-		return 0, nil, 0, errors.Wrap(err, "seek in final file")
+		return 0, nil, 0, fmt.Errorf("seek in final file: %w", err)
 	}
 	return n, f, seq, nil
 }
@@ -606,16 +604,16 @@ func newReader(bs []ByteSlice, cs []io.Closer, pool chunkenc.Pool) (*Reader, err
 	cr := Reader{pool: pool, bs: bs, cs: cs}
 	for i, b := range cr.bs {
 		if b.Len() < SegmentHeaderSize {
-			return nil, errors.Wrapf(errInvalidSize, "invalid segment header in segment %d", i)
+			return nil, fmt.Errorf("invalid segment header in segment %d: %w", i, errInvalidSize)
 		}
 		// Verify magic number.
 		if m := binary.BigEndian.Uint32(b.Range(0, MagicChunksSize)); m != MagicChunks {
-			return nil, errors.Errorf("invalid magic number %x", m)
+			return nil, fmt.Errorf("invalid magic number %x", m)
 		}
 
 		// Verify chunk format version.
 		if v := int(b.Range(MagicChunksSize, MagicChunksSize+ChunksFormatVersionSize)[0]); v != chunksFormatV1 {
-			return nil, errors.Errorf("invalid chunk format version %d", v)
+			return nil, fmt.Errorf("invalid chunk format version %d", v)
 		}
 		cr.size += int64(b.Len())
 	}
@@ -641,7 +639,7 @@ func NewDirReader(dir string, pool chunkenc.Pool) (*Reader, error) {
 		f, err := fileutil.OpenMmapFile(fn)
 		if err != nil {
 			return nil, tsdb_errors.NewMulti(
-				errors.Wrap(err, "mmap files"),
+				fmt.Errorf("mmap files: %w", err),
 				tsdb_errors.CloseAll(cs),
 			).Err()
 		}
@@ -673,20 +671,20 @@ func (s *Reader) Chunk(meta Meta) (chunkenc.Chunk, error) {
 	sgmIndex, chkStart := BlockChunkRef(meta.Ref).Unpack()
 
 	if sgmIndex >= len(s.bs) {
-		return nil, errors.Errorf("segment index %d out of range", sgmIndex)
+		return nil, fmt.Errorf("segment index %d out of range", sgmIndex)
 	}
 
 	sgmBytes := s.bs[sgmIndex]
 
 	if chkStart+MaxChunkLengthFieldSize > sgmBytes.Len() {
-		return nil, errors.Errorf("segment doesn't include enough bytes to read the chunk size data field - required:%v, available:%v", chkStart+MaxChunkLengthFieldSize, sgmBytes.Len())
+		return nil, fmt.Errorf("segment doesn't include enough bytes to read the chunk size data field - required:%v, available:%v", chkStart+MaxChunkLengthFieldSize, sgmBytes.Len())
 	}
 	// With the minimum chunk length this should never cause us reading
 	// over the end of the slice.
 	c := sgmBytes.Range(chkStart, chkStart+MaxChunkLengthFieldSize)
 	chkDataLen, n := binary.Uvarint(c)
 	if n <= 0 {
-		return nil, errors.Errorf("reading chunk length failed with %d", n)
+		return nil, fmt.Errorf("reading chunk length failed with %d", n)
 	}
 
 	chkEncStart := chkStart + n
@@ -695,7 +693,7 @@ func (s *Reader) Chunk(meta Meta) (chunkenc.Chunk, error) {
 	chkDataEnd := chkEnd - crc32.Size
 
 	if chkEnd > sgmBytes.Len() {
-		return nil, errors.Errorf("segment doesn't include enough bytes to read the chunk - required:%v, available:%v", chkEnd, sgmBytes.Len())
+		return nil, fmt.Errorf("segment doesn't include enough bytes to read the chunk - required:%v, available:%v", chkEnd, sgmBytes.Len())
 	}
 
 	sum := sgmBytes.Range(chkDataEnd, chkEnd)
diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go
index d73eb36f87..b495b61828 100644
--- a/tsdb/chunks/head_chunks.go
+++ b/tsdb/chunks/head_chunks.go
@@ -17,6 +17,8 @@ import (
 	"bufio"
 	"bytes"
 	"encoding/binary"
+	"errors"
+	"fmt"
 	"hash"
 	"io"
 	"os"
@@ -25,7 +27,6 @@ import (
 	"sync"
 
 	"github.com/dennwc/varint"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"go.uber.org/atomic"
 	"golang.org/x/exp/slices"
@@ -107,7 +108,7 @@ type CorruptionErr struct {
 }
 
 func (e *CorruptionErr) Error() string {
-	return errors.Wrapf(e.Err, "corruption in head chunk file %s", segmentFile(e.Dir, e.FileIndex)).Error()
+	return fmt.Errorf("corruption in head chunk file %s: %w", segmentFile(e.Dir, e.FileIndex), e.Err).Error()
 }
 
 // chunkPos keeps track of the position in the head chunk files.
@@ -240,10 +241,10 @@ type mmappedChunkFile struct {
 func NewChunkDiskMapper(reg prometheus.Registerer, dir string, pool chunkenc.Pool, writeBufferSize, writeQueueSize int) (*ChunkDiskMapper, error) {
 	// Validate write buffer size.
 	if writeBufferSize < MinWriteBufferSize || writeBufferSize > MaxWriteBufferSize {
-		return nil, errors.Errorf("ChunkDiskMapper write buffer size should be between %d and %d (actual: %d)", MinWriteBufferSize, MaxWriteBufferSize, writeBufferSize)
+		return nil, fmt.Errorf("ChunkDiskMapper write buffer size should be between %d and %d (actual: %d)", MinWriteBufferSize, MaxWriteBufferSize, writeBufferSize)
 	}
 	if writeBufferSize%1024 != 0 {
-		return nil, errors.Errorf("ChunkDiskMapper write buffer size should be a multiple of 1024 (actual: %d)", writeBufferSize)
+		return nil, fmt.Errorf("ChunkDiskMapper write buffer size should be a multiple of 1024 (actual: %d)", writeBufferSize)
 	}
 
 	if err := os.MkdirAll(dir, 0o777); err != nil {
@@ -320,7 +321,7 @@ func (cdm *ChunkDiskMapper) openMMapFiles() (returnErr error) {
 	for seq, fn := range files {
 		f, err := fileutil.OpenMmapFile(fn)
 		if err != nil {
-			return errors.Wrapf(err, "mmap files, file: %s", fn)
+			return fmt.Errorf("mmap files, file: %s: %w", fn, err)
 		}
 		cdm.closers[seq] = f
 		cdm.mmappedChunkFiles[seq] = &mmappedChunkFile{byteSlice: realByteSlice(f.Bytes())}
@@ -335,23 +336,23 @@ func (cdm *ChunkDiskMapper) openMMapFiles() (returnErr error) {
 	lastSeq := chkFileIndices[0]
 	for _, seq := range chkFileIndices[1:] {
 		if seq != lastSeq+1 {
-			return errors.Errorf("found unsequential head chunk files %s (index: %d) and %s (index: %d)", files[lastSeq], lastSeq, files[seq], seq)
+			return fmt.Errorf("found unsequential head chunk files %s (index: %d) and %s (index: %d)", files[lastSeq], lastSeq, files[seq], seq)
 		}
 		lastSeq = seq
 	}
 
 	for i, b := range cdm.mmappedChunkFiles {
 		if b.byteSlice.Len() < HeadChunkFileHeaderSize {
-			return errors.Wrapf(errInvalidSize, "%s: invalid head chunk file header", files[i])
+			return fmt.Errorf("%s: invalid head chunk file header: %w", files[i], errInvalidSize)
 		}
 		// Verify magic number.
 		if m := binary.BigEndian.Uint32(b.byteSlice.Range(0, MagicChunksSize)); m != MagicHeadChunks {
-			return errors.Errorf("%s: invalid magic number %x", files[i], m)
+			return fmt.Errorf("%s: invalid magic number %x", files[i], m)
 		}
 
 		// Verify chunk format version.
 		if v := int(b.byteSlice.Range(MagicChunksSize, MagicChunksSize+ChunksFormatVersionSize)[0]); v != chunksFormatV1 {
-			return errors.Errorf("%s: invalid chunk format version %d", files[i], v)
+			return fmt.Errorf("%s: invalid chunk format version %d", files[i], v)
 		}
 	}
 
@@ -394,16 +395,16 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro
 
 	f, err := os.Open(files[lastFile])
 	if err != nil {
-		return files, errors.Wrap(err, "open file during last head chunk file repair")
+		return files, fmt.Errorf("open file during last head chunk file repair: %w", err)
 	}
 
 	buf := make([]byte, MagicChunksSize)
 	size, err := f.Read(buf)
 	if err != nil && err != io.EOF {
-		return files, errors.Wrap(err, "failed to read magic number during last head chunk file repair")
+		return files, fmt.Errorf("failed to read magic number during last head chunk file repair: %w", err)
 	}
 	if err := f.Close(); err != nil {
-		return files, errors.Wrap(err, "close file during last head chunk file repair")
+		return files, fmt.Errorf("close file during last head chunk file repair: %w", err)
 	}
 
 	// We either don't have enough bytes for the magic number or the magic number is 0.
@@ -413,7 +414,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro
 	if size < MagicChunksSize || binary.BigEndian.Uint32(buf) == 0 {
 		// Corrupt file, hence remove it.
 		if err := os.RemoveAll(files[lastFile]); err != nil {
-			return files, errors.Wrap(err, "delete corrupted, empty head chunk file during last file repair")
+			return files, fmt.Errorf("delete corrupted, empty head chunk file during last file repair: %w", err)
 		}
 		delete(files, lastFile)
 	}
@@ -559,7 +560,7 @@ func (cdm *ChunkDiskMapper) cutAndExpectRef(chkRef ChunkDiskMapperRef) (err erro
 	}
 
 	if expSeq, expOffset := chkRef.Unpack(); seq != expSeq || offset != expOffset {
-		return errors.Errorf("expected newly cut file to have sequence:offset %d:%d, got %d:%d", expSeq, expOffset, seq, offset)
+		return fmt.Errorf("expected newly cut file to have sequence:offset %d:%d, got %d:%d", expSeq, expOffset, seq, offset)
 	}
 
 	return nil
@@ -701,13 +702,13 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 			return nil, &CorruptionErr{
 				Dir:       cdm.dir.Name(),
 				FileIndex: -1,
-				Err:       errors.Errorf("head chunk file index %d more than current open file", sgmIndex),
+				Err:       fmt.Errorf("head chunk file index %d more than current open file", sgmIndex),
 			}
 		}
 		return nil, &CorruptionErr{
 			Dir:       cdm.dir.Name(),
 			FileIndex: sgmIndex,
-			Err:       errors.Errorf("head chunk file index %d does not exist on disk", sgmIndex),
+			Err:       fmt.Errorf("head chunk file index %d does not exist on disk", sgmIndex),
 		}
 	}
 
@@ -715,7 +716,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 		return nil, &CorruptionErr{
 			Dir:       cdm.dir.Name(),
 			FileIndex: sgmIndex,
-			Err:       errors.Errorf("head chunk file doesn't include enough bytes to read the chunk size data field - required:%v, available:%v", chkStart+MaxChunkLengthFieldSize, mmapFile.byteSlice.Len()),
+			Err:       fmt.Errorf("head chunk file doesn't include enough bytes to read the chunk size data field - required:%v, available:%v", chkStart+MaxChunkLengthFieldSize, mmapFile.byteSlice.Len()),
 		}
 	}
 
@@ -734,7 +735,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 		return nil, &CorruptionErr{
 			Dir:       cdm.dir.Name(),
 			FileIndex: sgmIndex,
-			Err:       errors.Errorf("reading chunk length failed with %d", n),
+			Err:       fmt.Errorf("reading chunk length failed with %d", n),
 		}
 	}
 
@@ -744,7 +745,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 		return nil, &CorruptionErr{
 			Dir:       cdm.dir.Name(),
 			FileIndex: sgmIndex,
-			Err:       errors.Errorf("head chunk file doesn't include enough bytes to read the chunk - required:%v, available:%v", chkDataEnd, mmapFile.byteSlice.Len()),
+			Err:       fmt.Errorf("head chunk file doesn't include enough bytes to read the chunk - required:%v, available:%v", chkDataEnd, mmapFile.byteSlice.Len()),
 		}
 	}
 
@@ -761,7 +762,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 		return nil, &CorruptionErr{
 			Dir:       cdm.dir.Name(),
 			FileIndex: sgmIndex,
-			Err:       errors.Errorf("checksum mismatch expected:%x, actual:%x", sum, act),
+			Err:       fmt.Errorf("checksum mismatch expected:%x, actual:%x", sum, act),
 		}
 	}
 
@@ -829,7 +830,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 				return &CorruptionErr{
 					Dir:       cdm.dir.Name(),
 					FileIndex: segID,
-					Err: errors.Errorf("head chunk file has some unread data, but doesn't include enough bytes to read the chunk header"+
+					Err: fmt.Errorf("head chunk file has some unread data, but doesn't include enough bytes to read the chunk header"+
 						" - required:%v, available:%v, file:%d", idx+MaxHeadChunkMetaSize, fileEnd, segID),
 				}
 			}
@@ -866,7 +867,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 				return &CorruptionErr{
 					Dir:       cdm.dir.Name(),
 					FileIndex: segID,
-					Err:       errors.Errorf("head chunk file doesn't include enough bytes to read the chunk header - required:%v, available:%v, file:%d", idx+CRCSize, fileEnd, segID),
+					Err:       fmt.Errorf("head chunk file doesn't include enough bytes to read the chunk header - required:%v, available:%v, file:%d", idx+CRCSize, fileEnd, segID),
 				}
 			}
 
@@ -879,7 +880,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 				return &CorruptionErr{
 					Dir:       cdm.dir.Name(),
 					FileIndex: segID,
-					Err:       errors.Errorf("checksum mismatch expected:%x, actual:%x", sum, act),
+					Err:       fmt.Errorf("checksum mismatch expected:%x, actual:%x", sum, act),
 				}
 			}
 			idx += CRCSize
@@ -905,7 +906,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 			return &CorruptionErr{
 				Dir:       cdm.dir.Name(),
 				FileIndex: segID,
-				Err:       errors.Errorf("head chunk file doesn't include enough bytes to read the last chunk data - required:%v, available:%v, file:%d", idx, fileEnd, segID),
+				Err:       fmt.Errorf("head chunk file doesn't include enough bytes to read the last chunk data - required:%v, available:%v, file:%d", idx, fileEnd, segID),
 			}
 		}
 	}
@@ -998,10 +999,9 @@ func (cdm *ChunkDiskMapper) deleteFiles(removedFiles []int) ([]int, error) {
 // DeleteCorrupted deletes all the head chunk files after the one which had the corruption
 // (including the corrupt file).
 func (cdm *ChunkDiskMapper) DeleteCorrupted(originalErr error) error {
-	err := errors.Cause(originalErr) // So that we can pick up errors even if wrapped.
-	cerr, ok := err.(*CorruptionErr)
-	if !ok {
-		return errors.Wrap(originalErr, "cannot handle error")
+	var cerr *CorruptionErr
+	if !errors.As(originalErr, &cerr) {
+		return fmt.Errorf("cannot handle error: %w", originalErr)
 	}
 
 	// Delete all the head chunk files following the corrupt head chunk file.

From ae9221e152c01daba1de8ddea4c01f9f6e86451c Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Wed, 8 Nov 2023 13:08:33 +0100
Subject: [PATCH 22/66] tsdb/index.Symbols: Drop context argument from Lookup
 method (#13058)

Drop context argument from tsdb/index.Symbols.Lookup since lookup
should be fast and the context checking is a performance hit.

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 tsdb/index/index.go      | 9 +++------
 tsdb/index/index_test.go | 5 ++---
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/tsdb/index/index.go b/tsdb/index/index.go
index 0eb0d1434a..893167c250 100644
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@@ -923,7 +923,7 @@ func (w *Writer) writePostingsToTmpFiles() error {
 			// Symbol numbers are in order, so the strings will also be in order.
 			slices.Sort(values)
 			for _, v := range values {
-				value, err := w.symbols.Lookup(w.ctx, v)
+				value, err := w.symbols.Lookup(v)
 				if err != nil {
 					return err
 				}
@@ -1295,7 +1295,7 @@ func NewSymbols(bs ByteSlice, version, off int) (*Symbols, error) {
 	return s, nil
 }
 
-func (s Symbols) Lookup(ctx context.Context, o uint32) (string, error) {
+func (s Symbols) Lookup(o uint32) (string, error) {
 	d := encoding.Decbuf{
 		B: s.bs.Range(0, s.bs.Len()),
 	}
@@ -1307,9 +1307,6 @@ func (s Symbols) Lookup(ctx context.Context, o uint32) (string, error) {
 		d.Skip(s.offsets[int(o/symbolFactor)])
 		// Walk until we find the one we want.
 		for i := o - (o / symbolFactor * symbolFactor); i > 0; i-- {
-			if ctx.Err() != nil {
-				return "", ctx.Err()
-			}
 			d.UvarintBytes()
 		}
 	} else {
@@ -1441,7 +1438,7 @@ func (r *Reader) lookupSymbol(ctx context.Context, o uint32) (string, error) {
 	if s, ok := r.nameSymbols[o]; ok {
 		return s, nil
 	}
-	return r.symbols.Lookup(ctx, o)
+	return r.symbols.Lookup(o)
 }
 
 // Symbols returns an iterator over the symbols that exist within the index.
diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go
index abc1477976..7a6683da2b 100644
--- a/tsdb/index/index_test.go
+++ b/tsdb/index/index_test.go
@@ -519,7 +519,6 @@ func TestNewFileReaderErrorNoOpenFiles(t *testing.T) {
 }
 
 func TestSymbols(t *testing.T) {
-	ctx := context.Background()
 	buf := encoding.Encbuf{}
 
 	// Add prefix to the buffer to simulate symbols as part of larger buffer.
@@ -542,11 +541,11 @@ func TestSymbols(t *testing.T) {
 	require.Equal(t, 32, s.Size())
 
 	for i := 99; i >= 0; i-- {
-		s, err := s.Lookup(ctx, uint32(i))
+		s, err := s.Lookup(uint32(i))
 		require.NoError(t, err)
 		require.Equal(t, string(rune(i)), s)
 	}
-	_, err = s.Lookup(ctx, 100)
+	_, err = s.Lookup(100)
 	require.Error(t, err)
 
 	for i := 99; i >= 0; i-- {

From ab2a7bb74fa32f9199e3b9d4a19c000c304a37e9 Mon Sep 17 00:00:00 2001
From: Ziqi Zhao <zhaoziqi9146@gmail.com>
Date: Wed, 8 Nov 2023 21:43:05 +0800
Subject: [PATCH 23/66] add generic shrink function (#13001)

Add `ReduceResolution` method to `Histogram` and `FloatHistogram`

This takes the original `mergeToSchema` function and turns it into a more generic `reduceResolution` function, which is the building block for the new methods.

The methods will help with addressing #12864.

---------

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
---
 model/histogram/float_histogram.go | 98 ++++++------------------------
 model/histogram/generic.go         | 87 ++++++++++++++++++++++++++
 model/histogram/generic_test.go    | 70 +++++++++++++++++++++
 model/histogram/histogram.go       | 12 ++++
 4 files changed, 186 insertions(+), 81 deletions(-)

diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index 22d33f5a4e..212b028800 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -94,8 +94,8 @@ func (h *FloatHistogram) CopyToSchema(targetSchema int32) *FloatHistogram {
 		Sum:           h.Sum,
 	}
 
-	c.PositiveSpans, c.PositiveBuckets = mergeToSchema(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema)
-	c.NegativeSpans, c.NegativeBuckets = mergeToSchema(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema)
+	c.PositiveSpans, c.PositiveBuckets = reduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false)
+	c.NegativeSpans, c.NegativeBuckets = reduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false)
 
 	return &c
 }
@@ -268,17 +268,12 @@ func (h *FloatHistogram) Add(other *FloatHistogram) *FloatHistogram {
 	h.Count += other.Count
 	h.Sum += other.Sum
 
-	otherPositiveSpans := other.PositiveSpans
-	otherPositiveBuckets := other.PositiveBuckets
-	otherNegativeSpans := other.NegativeSpans
-	otherNegativeBuckets := other.NegativeBuckets
 	if other.Schema != h.Schema {
-		otherPositiveSpans, otherPositiveBuckets = mergeToSchema(other.PositiveSpans, other.PositiveBuckets, other.Schema, h.Schema)
-		otherNegativeSpans, otherNegativeBuckets = mergeToSchema(other.NegativeSpans, other.NegativeBuckets, other.Schema, h.Schema)
+		other = other.ReduceResolution(h.Schema)
 	}
 
-	h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.PositiveSpans, h.PositiveBuckets, otherPositiveSpans, otherPositiveBuckets)
-	h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.NegativeSpans, h.NegativeBuckets, otherNegativeSpans, otherNegativeBuckets)
+	h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.PositiveSpans, h.PositiveBuckets, other.PositiveSpans, other.PositiveBuckets)
+	h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, h.NegativeSpans, h.NegativeBuckets, other.NegativeSpans, other.NegativeBuckets)
 	return h
 }
 
@@ -289,17 +284,12 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) *FloatHistogram {
 	h.Count -= other.Count
 	h.Sum -= other.Sum
 
-	otherPositiveSpans := other.PositiveSpans
-	otherPositiveBuckets := other.PositiveBuckets
-	otherNegativeSpans := other.NegativeSpans
-	otherNegativeBuckets := other.NegativeBuckets
 	if other.Schema != h.Schema {
-		otherPositiveSpans, otherPositiveBuckets = mergeToSchema(other.PositiveSpans, other.PositiveBuckets, other.Schema, h.Schema)
-		otherNegativeSpans, otherNegativeBuckets = mergeToSchema(other.NegativeSpans, other.NegativeBuckets, other.Schema, h.Schema)
+		other = other.ReduceResolution(h.Schema)
 	}
 
-	h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.PositiveSpans, h.PositiveBuckets, otherPositiveSpans, otherPositiveBuckets)
-	h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.NegativeSpans, h.NegativeBuckets, otherNegativeSpans, otherNegativeBuckets)
+	h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.PositiveSpans, h.PositiveBuckets, other.PositiveSpans, other.PositiveBuckets)
+	h.NegativeSpans, h.NegativeBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, h.NegativeSpans, h.NegativeBuckets, other.NegativeSpans, other.NegativeBuckets)
 	return h
 }
 
@@ -975,69 +965,6 @@ func targetIdx(idx, originSchema, targetSchema int32) int32 {
 	return ((idx - 1) >> (originSchema - targetSchema)) + 1
 }
 
-// mergeToSchema is used to merge a FloatHistogram's Spans and Buckets (no matter if
-// positive or negative) from the original schema to the target schema.
-// The target schema must be smaller than the original schema.
-func mergeToSchema(originSpans []Span, originBuckets []float64, originSchema, targetSchema int32) ([]Span, []float64) {
-	var (
-		targetSpans         []Span    // The spans in the target schema.
-		targetBuckets       []float64 // The buckets in the target schema.
-		bucketIdx           int32     // The index of bucket in the origin schema.
-		lastTargetBucketIdx int32     // The index of the last added target bucket.
-		origBucketIdx       int       // The position of a bucket in originBuckets slice.
-	)
-
-	for _, span := range originSpans {
-		// Determine the index of the first bucket in this span.
-		bucketIdx += span.Offset
-		for j := 0; j < int(span.Length); j++ {
-			// Determine the index of the bucket in the target schema from the index in the original schema.
-			targetBucketIdx := targetIdx(bucketIdx, originSchema, targetSchema)
-
-			switch {
-			case len(targetSpans) == 0:
-				// This is the first span in the targetSpans.
-				span := Span{
-					Offset: targetBucketIdx,
-					Length: 1,
-				}
-				targetSpans = append(targetSpans, span)
-				targetBuckets = append(targetBuckets, originBuckets[0])
-				lastTargetBucketIdx = targetBucketIdx
-
-			case lastTargetBucketIdx == targetBucketIdx:
-				// The current bucket has to be merged into the same target bucket as the previous bucket.
-				targetBuckets[len(targetBuckets)-1] += originBuckets[origBucketIdx]
-
-			case (lastTargetBucketIdx + 1) == targetBucketIdx:
-				// The current bucket has to go into a new target bucket,
-				// and that bucket is next to the previous target bucket,
-				// so we add it to the current target span.
-				targetSpans[len(targetSpans)-1].Length++
-				targetBuckets = append(targetBuckets, originBuckets[origBucketIdx])
-				lastTargetBucketIdx++
-
-			case (lastTargetBucketIdx + 1) < targetBucketIdx:
-				// The current bucket has to go into a new target bucket,
-				// and that bucket is separated by a gap from the previous target bucket,
-				// so we need to add a new target span.
-				span := Span{
-					Offset: targetBucketIdx - lastTargetBucketIdx - 1,
-					Length: 1,
-				}
-				targetSpans = append(targetSpans, span)
-				targetBuckets = append(targetBuckets, originBuckets[origBucketIdx])
-				lastTargetBucketIdx = targetBucketIdx
-			}
-
-			bucketIdx++
-			origBucketIdx++
-		}
-	}
-
-	return targetSpans, targetBuckets
-}
-
 // addBuckets adds the buckets described by spansB/bucketsB to the buckets described by spansA/bucketsA,
 // creating missing buckets in spansA/bucketsA as needed.
 // It returns the resulting spans/buckets (which must be used instead of the original spansA/bucketsA,
@@ -1179,3 +1106,12 @@ func floatBucketsMatch(b1, b2 []float64) bool {
 	}
 	return true
 }
+
+// ReduceResolution reduces the float histogram's spans, buckets into target schema.
+// The target schema must be smaller than the current float histogram's schema.
+func (h *FloatHistogram) ReduceResolution(targetSchema int32) *FloatHistogram {
+	h.PositiveSpans, h.PositiveBuckets = reduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false)
+	h.NegativeSpans, h.NegativeBuckets = reduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false)
+
+	return h
+}
diff --git a/model/histogram/generic.go b/model/histogram/generic.go
index 7e4eb1ecb1..d42bb24151 100644
--- a/model/histogram/generic.go
+++ b/model/histogram/generic.go
@@ -600,3 +600,90 @@ var exponentialBounds = [][]float64{
 		0.9892280131939752, 0.9919100824251095, 0.9945994234836328, 0.9972960560854698,
 	},
 }
+
+// reduceResolution reduces the input spans, buckets in origin schema to the spans, buckets in target schema.
+// The target schema must be smaller than the original schema.
+// Set deltaBuckets to true if the provided buckets are
+// deltas. Set it to false if the buckets contain absolute counts.
+func reduceResolution[IBC InternalBucketCount](originSpans []Span, originBuckets []IBC, originSchema, targetSchema int32, deltaBuckets bool) ([]Span, []IBC) {
+	var (
+		targetSpans           []Span // The spans in the target schema.
+		targetBuckets         []IBC  // The bucket counts in the target schema.
+		bucketIdx             int32  // The index of bucket in the origin schema.
+		bucketCountIdx        int    // The position of a bucket in origin bucket count slice `originBuckets`.
+		targetBucketIdx       int32  // The index of bucket in the target schema.
+		lastBucketCount       IBC    // The last visited bucket's count in the origin schema.
+		lastTargetBucketIdx   int32  // The index of the last added target bucket.
+		lastTargetBucketCount IBC
+	)
+
+	for _, span := range originSpans {
+		// Determine the index of the first bucket in this span.
+		bucketIdx += span.Offset
+		for j := 0; j < int(span.Length); j++ {
+			// Determine the index of the bucket in the target schema from the index in the original schema.
+			targetBucketIdx = targetIdx(bucketIdx, originSchema, targetSchema)
+
+			switch {
+			case len(targetSpans) == 0:
+				// This is the first span in the targetSpans.
+				span := Span{
+					Offset: targetBucketIdx,
+					Length: 1,
+				}
+				targetSpans = append(targetSpans, span)
+				targetBuckets = append(targetBuckets, originBuckets[bucketCountIdx])
+				lastTargetBucketIdx = targetBucketIdx
+				lastBucketCount = originBuckets[bucketCountIdx]
+				lastTargetBucketCount = originBuckets[bucketCountIdx]
+
+			case lastTargetBucketIdx == targetBucketIdx:
+				// The current bucket has to be merged into the same target bucket as the previous bucket.
+				if deltaBuckets {
+					lastBucketCount += originBuckets[bucketCountIdx]
+					targetBuckets[len(targetBuckets)-1] += lastBucketCount
+					lastTargetBucketCount += lastBucketCount
+				} else {
+					targetBuckets[len(targetBuckets)-1] += originBuckets[bucketCountIdx]
+				}
+
+			case (lastTargetBucketIdx + 1) == targetBucketIdx:
+				// The current bucket has to go into a new target bucket,
+				// and that bucket is next to the previous target bucket,
+				// so we add it to the current target span.
+				targetSpans[len(targetSpans)-1].Length++
+				lastTargetBucketIdx++
+				if deltaBuckets {
+					lastBucketCount += originBuckets[bucketCountIdx]
+					targetBuckets = append(targetBuckets, lastBucketCount-lastTargetBucketCount)
+					lastTargetBucketCount = lastBucketCount
+				} else {
+					targetBuckets = append(targetBuckets, originBuckets[bucketCountIdx])
+				}
+
+			case (lastTargetBucketIdx + 1) < targetBucketIdx:
+				// The current bucket has to go into a new target bucket,
+				// and that bucket is separated by a gap from the previous target bucket,
+				// so we need to add a new target span.
+				span := Span{
+					Offset: targetBucketIdx - lastTargetBucketIdx - 1,
+					Length: 1,
+				}
+				targetSpans = append(targetSpans, span)
+				lastTargetBucketIdx = targetBucketIdx
+				if deltaBuckets {
+					lastBucketCount += originBuckets[bucketCountIdx]
+					targetBuckets = append(targetBuckets, lastBucketCount-lastTargetBucketCount)
+					lastTargetBucketCount = lastBucketCount
+				} else {
+					targetBuckets = append(targetBuckets, originBuckets[bucketCountIdx])
+				}
+			}
+
+			bucketIdx++
+			bucketCountIdx++
+		}
+	}
+
+	return targetSpans, targetBuckets
+}
diff --git a/model/histogram/generic_test.go b/model/histogram/generic_test.go
index 55015c047f..d24910d214 100644
--- a/model/histogram/generic_test.go
+++ b/model/histogram/generic_test.go
@@ -110,3 +110,73 @@ func TestGetBound(t *testing.T) {
 		}
 	}
 }
+
+func TestReduceResolutionHistogram(t *testing.T) {
+	cases := []struct {
+		spans           []Span
+		buckets         []int64
+		schema          int32
+		targetSchema    int32
+		expectedSpans   []Span
+		expectedBuckets []int64
+	}{
+		{
+			spans: []Span{
+				{Offset: 0, Length: 4},
+				{Offset: 0, Length: 0},
+				{Offset: 3, Length: 2},
+			},
+			buckets:      []int64{1, 2, -2, 1, -1, 0},
+			schema:       0,
+			targetSchema: -1,
+			expectedSpans: []Span{
+				{Offset: 0, Length: 3},
+				{Offset: 1, Length: 1},
+			},
+			expectedBuckets: []int64{1, 3, -2, 0},
+			// schema 0, base 2 { (0.5, 1]:1  (1,2]:3, (2,4]:1, (4,8]:2, (8,16]:0, (16,32]:0, (32,64]:0, (64,128]:1, (128,256]:1}",
+			// schema 1, base 4 { (0.25, 1):1 (1,4]:4,          (4,16]:2,          (16,64]:0,            (64,256]:2}
+		},
+	}
+
+	for _, tc := range cases {
+		spans, buckets := reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, true)
+		require.Equal(t, tc.expectedSpans, spans)
+		require.Equal(t, tc.expectedBuckets, buckets)
+	}
+}
+
+func TestReduceResolutionFloatHistogram(t *testing.T) {
+	cases := []struct {
+		spans           []Span
+		buckets         []float64
+		schema          int32
+		targetSchema    int32
+		expectedSpans   []Span
+		expectedBuckets []float64
+	}{
+		{
+			spans: []Span{
+				{Offset: 0, Length: 4},
+				{Offset: 0, Length: 0},
+				{Offset: 3, Length: 2},
+			},
+			buckets:      []float64{1, 3, 1, 2, 1, 1},
+			schema:       0,
+			targetSchema: -1,
+			expectedSpans: []Span{
+				{Offset: 0, Length: 3},
+				{Offset: 1, Length: 1},
+			},
+			expectedBuckets: []float64{1, 4, 2, 2},
+			// schema 0, base 2 { (0.5, 1]:1  (1,2]:3, (2,4]:1, (4,8]:2, (8,16]:0, (16,32]:0, (32,64]:0, (64,128]:1, (128,256]:1}",
+			// schema 1, base 4 { (0.25, 1):1 (1,4]:4,          (4,16]:2,          (16,64]:0,            (64,256]:2}
+		},
+	}
+
+	for _, tc := range cases {
+		spans, buckets := reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, false)
+		require.Equal(t, tc.expectedSpans, spans)
+		require.Equal(t, tc.expectedBuckets, buckets)
+	}
+}
diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go
index 30c23e5e79..4699bd3cbe 100644
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@@ -493,3 +493,15 @@ func (c *cumulativeBucketIterator) At() Bucket[uint64] {
 		Index:          c.currIdx - 1,
 	}
 }
+
+// ReduceResolution reduces the histogram's spans, buckets into target schema.
+// The target schema must be smaller than the current histogram's schema.
+func (h *Histogram) ReduceResolution(targetSchema int32) *Histogram {
+	h.PositiveSpans, h.PositiveBuckets = reduceResolution(
+		h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, true,
+	)
+	h.NegativeSpans, h.NegativeBuckets = reduceResolution(
+		h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, true,
+	)
+	return h
+}

From 443867f1aa07ca76c95ed5e7003edc18e73971c9 Mon Sep 17 00:00:00 2001
From: songjiayang <songjiayang1@gmail.com>
Date: Thu, 9 Nov 2023 00:42:50 +0800
Subject: [PATCH 24/66] symbolCacheEntry field type alignment, thus saving 8
 bytes.

Signed-off-by: songjiayang <songjiayang1@gmail.com>
---
 tsdb/index/index.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tsdb/index/index.go b/tsdb/index/index.go
index 893167c250..0680906034 100644
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@@ -107,8 +107,8 @@ func newCRC32() hash.Hash32 {
 
 type symbolCacheEntry struct {
 	index          uint32
-	lastValue      string
 	lastValueIndex uint32
+	lastValue      string
 }
 
 // Writer implements the IndexWriter interface for the standard
@@ -457,8 +457,8 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ...
 			}
 			w.symbolCache[l.Name] = symbolCacheEntry{
 				index:          nameIndex,
-				lastValue:      l.Value,
 				lastValueIndex: valueIndex,
+				lastValue:      l.Value,
 			}
 		}
 		w.buf2.PutUvarint32(valueIndex)

From fb48a351f0b274344eb92b1994d6302e4488f83d Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Wed, 8 Nov 2023 21:45:14 +0100
Subject: [PATCH 25/66] tsdb/wlog: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/wlog/checkpoint.go      | 46 ++++++++++++++++++------------------
 tsdb/wlog/checkpoint_test.go |  3 +--
 tsdb/wlog/live_reader.go     | 10 ++++----
 tsdb/wlog/reader.go          | 17 ++++++-------
 tsdb/wlog/wlog.go            | 40 +++++++++++++++----------------
 5 files changed, 57 insertions(+), 59 deletions(-)

diff --git a/tsdb/wlog/checkpoint.go b/tsdb/wlog/checkpoint.go
index d64599c276..3d5b56da27 100644
--- a/tsdb/wlog/checkpoint.go
+++ b/tsdb/wlog/checkpoint.go
@@ -15,6 +15,7 @@
 package wlog
 
 import (
+	"errors"
 	"fmt"
 	"io"
 	"math"
@@ -25,7 +26,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 
 	"github.com/prometheus/prometheus/tsdb/chunks"
@@ -102,8 +102,8 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 	{
 		var sgmRange []SegmentRange
 		dir, idx, err := LastCheckpoint(w.Dir())
-		if err != nil && err != record.ErrNotFound {
-			return nil, errors.Wrap(err, "find last checkpoint")
+		if err != nil && !errors.Is(err, record.ErrNotFound) {
+			return nil, fmt.Errorf("find last checkpoint: %w", err)
 		}
 		last := idx + 1
 		if err == nil {
@@ -119,7 +119,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		sgmRange = append(sgmRange, SegmentRange{Dir: w.Dir(), First: from, Last: to})
 		sgmReader, err = NewSegmentsRangeReader(sgmRange...)
 		if err != nil {
-			return nil, errors.Wrap(err, "create segment reader")
+			return nil, fmt.Errorf("create segment reader: %w", err)
 		}
 		defer sgmReader.Close()
 	}
@@ -128,15 +128,15 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 	cpdirtmp := cpdir + ".tmp"
 
 	if err := os.RemoveAll(cpdirtmp); err != nil {
-		return nil, errors.Wrap(err, "remove previous temporary checkpoint dir")
+		return nil, fmt.Errorf("remove previous temporary checkpoint dir: %w", err)
 	}
 
 	if err := os.MkdirAll(cpdirtmp, 0o777); err != nil {
-		return nil, errors.Wrap(err, "create checkpoint dir")
+		return nil, fmt.Errorf("create checkpoint dir: %w", err)
 	}
 	cp, err := New(nil, nil, cpdirtmp, w.CompressionType())
 	if err != nil {
-		return nil, errors.Wrap(err, "open checkpoint")
+		return nil, fmt.Errorf("open checkpoint: %w", err)
 	}
 
 	// Ensures that an early return caused by an error doesn't leave any tmp files.
@@ -174,7 +174,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.Series:
 			series, err = dec.Series(rec, series)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode series")
+				return nil, fmt.Errorf("decode series: %w", err)
 			}
 			// Drop irrelevant series in place.
 			repl := series[:0]
@@ -192,7 +192,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.Samples:
 			samples, err = dec.Samples(rec, samples)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode samples")
+				return nil, fmt.Errorf("decode samples: %w", err)
 			}
 			// Drop irrelevant samples in place.
 			repl := samples[:0]
@@ -210,7 +210,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.HistogramSamples:
 			histogramSamples, err = dec.HistogramSamples(rec, histogramSamples)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode histogram samples")
+				return nil, fmt.Errorf("decode histogram samples: %w", err)
 			}
 			// Drop irrelevant histogramSamples in place.
 			repl := histogramSamples[:0]
@@ -228,7 +228,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.Tombstones:
 			tstones, err = dec.Tombstones(rec, tstones)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode deletes")
+				return nil, fmt.Errorf("decode deletes: %w", err)
 			}
 			// Drop irrelevant tombstones in place.
 			repl := tstones[:0]
@@ -249,7 +249,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.Exemplars:
 			exemplars, err = dec.Exemplars(rec, exemplars)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode exemplars")
+				return nil, fmt.Errorf("decode exemplars: %w", err)
 			}
 			// Drop irrelevant exemplars in place.
 			repl := exemplars[:0]
@@ -266,7 +266,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		case record.Metadata:
 			metadata, err := dec.Metadata(rec, metadata)
 			if err != nil {
-				return nil, errors.Wrap(err, "decode metadata")
+				return nil, fmt.Errorf("decode metadata: %w", err)
 			}
 			// Only keep reference to the latest found metadata for each refID.
 			repl := 0
@@ -292,7 +292,7 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 		// Flush records in 1 MB increments.
 		if len(buf) > 1*1024*1024 {
 			if err := cp.Log(recs...); err != nil {
-				return nil, errors.Wrap(err, "flush records")
+				return nil, fmt.Errorf("flush records: %w", err)
 			}
 			buf, recs = buf[:0], recs[:0]
 		}
@@ -300,12 +300,12 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 	// If we hit any corruption during checkpointing, repairing is not an option.
 	// The head won't know which series records are lost.
 	if r.Err() != nil {
-		return nil, errors.Wrap(r.Err(), "read segments")
+		return nil, fmt.Errorf("read segments: %w", r.Err())
 	}
 
 	// Flush remaining records.
 	if err := cp.Log(recs...); err != nil {
-		return nil, errors.Wrap(err, "flush records")
+		return nil, fmt.Errorf("flush records: %w", err)
 	}
 
 	// Flush latest metadata records for each series.
@@ -315,29 +315,29 @@ func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.Head
 			latestMetadata = append(latestMetadata, m)
 		}
 		if err := cp.Log(enc.Metadata(latestMetadata, buf[:0])); err != nil {
-			return nil, errors.Wrap(err, "flush metadata records")
+			return nil, fmt.Errorf("flush metadata records: %w", err)
 		}
 	}
 
 	if err := cp.Close(); err != nil {
-		return nil, errors.Wrap(err, "close checkpoint")
+		return nil, fmt.Errorf("close checkpoint: %w", err)
 	}
 
 	// Sync temporary directory before rename.
 	df, err := fileutil.OpenDir(cpdirtmp)
 	if err != nil {
-		return nil, errors.Wrap(err, "open temporary checkpoint directory")
+		return nil, fmt.Errorf("open temporary checkpoint directory: %w", err)
 	}
 	if err := df.Sync(); err != nil {
 		df.Close()
-		return nil, errors.Wrap(err, "sync temporary checkpoint directory")
+		return nil, fmt.Errorf("sync temporary checkpoint directory: %w", err)
 	}
 	if err = df.Close(); err != nil {
-		return nil, errors.Wrap(err, "close temporary checkpoint directory")
+		return nil, fmt.Errorf("close temporary checkpoint directory: %w", err)
 	}
 
 	if err := fileutil.Replace(cpdirtmp, cpdir); err != nil {
-		return nil, errors.Wrap(err, "rename checkpoint directory")
+		return nil, fmt.Errorf("rename checkpoint directory: %w", err)
 	}
 
 	return stats, nil
@@ -364,7 +364,7 @@ func listCheckpoints(dir string) (refs []checkpointRef, err error) {
 			continue
 		}
 		if !fi.IsDir() {
-			return nil, errors.Errorf("checkpoint %s is not a directory", fi.Name())
+			return nil, fmt.Errorf("checkpoint %s is not a directory", fi.Name())
 		}
 		idx, err := strconv.Atoi(fi.Name()[len(checkpointPrefix):])
 		if err != nil {
diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go
index 704a65cc15..381e091861 100644
--- a/tsdb/wlog/checkpoint_test.go
+++ b/tsdb/wlog/checkpoint_test.go
@@ -23,7 +23,6 @@ import (
 	"testing"
 
 	"github.com/go-kit/log"
-	"github.com/pkg/errors"
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/histogram"
@@ -325,7 +324,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
 	// Walk the wlog dir to make sure there are no tmp folder left behind after the error.
 	err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
 		if err != nil {
-			return errors.Wrapf(err, "access err %q: %v", path, err)
+			return fmt.Errorf("access err %q: %w", path, err)
 		}
 		if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
 			return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name())
diff --git a/tsdb/wlog/live_reader.go b/tsdb/wlog/live_reader.go
index a440eedf79..905bbf00d6 100644
--- a/tsdb/wlog/live_reader.go
+++ b/tsdb/wlog/live_reader.go
@@ -16,6 +16,7 @@ package wlog
 
 import (
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"hash/crc32"
 	"io"
@@ -24,7 +25,6 @@ import (
 	"github.com/go-kit/log/level"
 	"github.com/golang/snappy"
 	"github.com/klauspost/compress/zstd"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 )
 
@@ -135,7 +135,7 @@ func (r *LiveReader) Next() bool {
 		switch ok, err := r.buildRecord(); {
 		case ok:
 			return true
-		case err != nil && err != io.EOF:
+		case err != nil && !errors.Is(err, io.EOF):
 			r.err = err
 			return false
 		}
@@ -157,7 +157,7 @@ func (r *LiveReader) Next() bool {
 
 		if r.writeIndex != pageSize {
 			n, err := r.fillBuffer()
-			if n == 0 || (err != nil && err != io.EOF) {
+			if n == 0 || (err != nil && !errors.Is(err, io.EOF)) {
 				r.err = err
 				return false
 			}
@@ -265,7 +265,7 @@ func validateRecord(typ recType, i int) error {
 		}
 		return nil
 	default:
-		return errors.Errorf("unexpected record type %d", typ)
+		return fmt.Errorf("unexpected record type %d", typ)
 	}
 }
 
@@ -322,7 +322,7 @@ func (r *LiveReader) readRecord() ([]byte, int, error) {
 
 	rec := r.buf[r.readIndex+recordHeaderSize : r.readIndex+recordHeaderSize+length]
 	if c := crc32.Checksum(rec, castagnoliTable); c != crc {
-		return nil, 0, errors.Errorf("unexpected checksum %x, expected %x", c, crc)
+		return nil, 0, fmt.Errorf("unexpected checksum %x, expected %x", c, crc)
 	}
 
 	return rec, length + recordHeaderSize, nil
diff --git a/tsdb/wlog/reader.go b/tsdb/wlog/reader.go
index f77b03b8ea..a744b0cc4b 100644
--- a/tsdb/wlog/reader.go
+++ b/tsdb/wlog/reader.go
@@ -16,12 +16,13 @@ package wlog
 
 import (
 	"encoding/binary"
+	"errors"
+	"fmt"
 	"hash/crc32"
 	"io"
 
 	"github.com/golang/snappy"
 	"github.com/klauspost/compress/zstd"
-	"github.com/pkg/errors"
 )
 
 // Reader reads WAL records from an io.Reader.
@@ -47,7 +48,7 @@ func NewReader(r io.Reader) *Reader {
 // It must not be called again after it returned false.
 func (r *Reader) Next() bool {
 	err := r.next()
-	if errors.Is(err, io.EOF) {
+	if err != nil && errors.Is(err, io.EOF) {
 		// The last WAL segment record shouldn't be torn(should be full or last).
 		// The last record would be torn after a crash just before
 		// the last record part could be persisted to disk.
@@ -72,7 +73,7 @@ func (r *Reader) next() (err error) {
 	i := 0
 	for {
 		if _, err = io.ReadFull(r.rdr, hdr[:1]); err != nil {
-			return errors.Wrap(err, "read first header byte")
+			return fmt.Errorf("read first header byte: %w", err)
 		}
 		r.total++
 		r.curRecTyp = recTypeFromHeader(hdr[0])
@@ -95,7 +96,7 @@ func (r *Reader) next() (err error) {
 			}
 			n, err := io.ReadFull(r.rdr, buf[:k])
 			if err != nil {
-				return errors.Wrap(err, "read remaining zeros")
+				return fmt.Errorf("read remaining zeros: %w", err)
 			}
 			r.total += int64(n)
 
@@ -108,7 +109,7 @@ func (r *Reader) next() (err error) {
 		}
 		n, err := io.ReadFull(r.rdr, hdr[1:])
 		if err != nil {
-			return errors.Wrap(err, "read remaining header")
+			return fmt.Errorf("read remaining header: %w", err)
 		}
 		r.total += int64(n)
 
@@ -118,7 +119,7 @@ func (r *Reader) next() (err error) {
 		)
 
 		if length > pageSize-recordHeaderSize {
-			return errors.Errorf("invalid record size %d", length)
+			return fmt.Errorf("invalid record size %d", length)
 		}
 		n, err = io.ReadFull(r.rdr, buf[:length])
 		if err != nil {
@@ -127,10 +128,10 @@ func (r *Reader) next() (err error) {
 		r.total += int64(n)
 
 		if n != int(length) {
-			return errors.Errorf("invalid size: expected %d, got %d", length, n)
+			return fmt.Errorf("invalid size: expected %d, got %d", length, n)
 		}
 		if c := crc32.Checksum(buf[:length], castagnoliTable); c != crc {
-			return errors.Errorf("unexpected checksum %x, expected %x", c, crc)
+			return fmt.Errorf("unexpected checksum %x, expected %x", c, crc)
 		}
 
 		if isSnappyCompressed || isZstdCompressed {
diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go
index 16924d2497..c4305bcbf1 100644
--- a/tsdb/wlog/wlog.go
+++ b/tsdb/wlog/wlog.go
@@ -17,6 +17,7 @@ package wlog
 import (
 	"bufio"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"hash/crc32"
 	"io"
@@ -30,7 +31,6 @@ import (
 	"github.com/go-kit/log/level"
 	"github.com/golang/snappy"
 	"github.com/klauspost/compress/zstd"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/exp/slices"
 
@@ -137,7 +137,7 @@ func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) {
 		level.Warn(logger).Log("msg", "Last page of the wlog is torn, filling it with zeros", "segment", segName)
 		if _, err := f.Write(make([]byte, pageSize-d)); err != nil {
 			f.Close()
-			return nil, errors.Wrap(err, "zero-pad torn page")
+			return nil, fmt.Errorf("zero-pad torn page: %w", err)
 		}
 	}
 	return &Segment{SegmentFile: f, i: k, dir: dir}, nil
@@ -298,7 +298,7 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi
 		return nil, errors.New("invalid segment size")
 	}
 	if err := os.MkdirAll(dir, 0o777); err != nil {
-		return nil, errors.Wrap(err, "create dir")
+		return nil, fmt.Errorf("create dir: %w", err)
 	}
 	if logger == nil {
 		logger = log.NewNopLogger()
@@ -331,7 +331,7 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi
 
 	_, last, err := Segments(w.Dir())
 	if err != nil {
-		return nil, errors.Wrap(err, "get segment range")
+		return nil, fmt.Errorf("get segment range: %w", err)
 	}
 
 	// Index of the Segment we want to open and write to.
@@ -414,11 +414,9 @@ func (w *WL) Repair(origErr error) error {
 	// But that's not generally applicable if the records have any kind of causality.
 	// Maybe as an extra mode in the future if mid-WAL corruptions become
 	// a frequent concern.
-	err := errors.Cause(origErr) // So that we can pick up errors even if wrapped.
-
-	cerr, ok := err.(*CorruptionErr)
-	if !ok {
-		return errors.Wrap(origErr, "cannot handle error")
+	var cerr *CorruptionErr
+	if !errors.As(origErr, &cerr) {
+		return fmt.Errorf("cannot handle error: %w", origErr)
 	}
 	if cerr.Segment < 0 {
 		return errors.New("corruption error does not specify position")
@@ -429,7 +427,7 @@ func (w *WL) Repair(origErr error) error {
 	// All segments behind the corruption can no longer be used.
 	segs, err := listSegments(w.Dir())
 	if err != nil {
-		return errors.Wrap(err, "list segments")
+		return fmt.Errorf("list segments: %w", err)
 	}
 	level.Warn(w.logger).Log("msg", "Deleting all segments newer than corrupted segment", "segment", cerr.Segment)
 
@@ -440,14 +438,14 @@ func (w *WL) Repair(origErr error) error {
 			// as we set the current segment to repaired file
 			// below.
 			if err := w.segment.Close(); err != nil {
-				return errors.Wrap(err, "close active segment")
+				return fmt.Errorf("close active segment: %w", err)
 			}
 		}
 		if s.index <= cerr.Segment {
 			continue
 		}
 		if err := os.Remove(filepath.Join(w.Dir(), s.name)); err != nil {
-			return errors.Wrapf(err, "delete segment:%v", s.index)
+			return fmt.Errorf("delete segment:%v: %w", s.index, err)
 		}
 	}
 	// Regardless of the corruption offset, no record reaches into the previous segment.
@@ -472,7 +470,7 @@ func (w *WL) Repair(origErr error) error {
 
 	f, err := os.Open(tmpfn)
 	if err != nil {
-		return errors.Wrap(err, "open segment")
+		return fmt.Errorf("open segment: %w", err)
 	}
 	defer f.Close()
 
@@ -484,24 +482,24 @@ func (w *WL) Repair(origErr error) error {
 			break
 		}
 		if err := w.Log(r.Record()); err != nil {
-			return errors.Wrap(err, "insert record")
+			return fmt.Errorf("insert record: %w", err)
 		}
 	}
 	// We expect an error here from r.Err(), so nothing to handle.
 
 	// We need to pad to the end of the last page in the repaired segment
 	if err := w.flushPage(true); err != nil {
-		return errors.Wrap(err, "flush page in repair")
+		return fmt.Errorf("flush page in repair: %w", err)
 	}
 
 	// We explicitly close even when there is a defer for Windows to be
 	// able to delete it. The defer is in place to close it in-case there
 	// are errors above.
 	if err := f.Close(); err != nil {
-		return errors.Wrap(err, "close corrupted file")
+		return fmt.Errorf("close corrupted file: %w", err)
 	}
 	if err := os.Remove(tmpfn); err != nil {
-		return errors.Wrap(err, "delete corrupted segment")
+		return fmt.Errorf("delete corrupted segment: %w", err)
 	}
 
 	// Explicitly close the segment we just repaired to avoid issues with Windows.
@@ -553,7 +551,7 @@ func (w *WL) nextSegment(async bool) (int, error) {
 	}
 	next, err := CreateSegment(w.Dir(), w.segment.Index()+1)
 	if err != nil {
-		return 0, errors.Wrap(err, "create new segment file")
+		return 0, fmt.Errorf("create new segment file: %w", err)
 	}
 	prev := w.segment
 	if err := w.setSegment(next); err != nil {
@@ -940,7 +938,7 @@ func NewSegmentsRangeReader(sr ...SegmentRange) (io.ReadCloser, error) {
 	for _, sgmRange := range sr {
 		refs, err := listSegments(sgmRange.Dir)
 		if err != nil {
-			return nil, errors.Wrapf(err, "list segment in dir:%v", sgmRange.Dir)
+			return nil, fmt.Errorf("list segment in dir:%v: %w", sgmRange.Dir, err)
 		}
 
 		for _, r := range refs {
@@ -952,7 +950,7 @@ func NewSegmentsRangeReader(sr ...SegmentRange) (io.ReadCloser, error) {
 			}
 			s, err := OpenReadSegment(filepath.Join(sgmRange.Dir, r.name))
 			if err != nil {
-				return nil, errors.Wrapf(err, "open segment:%v in dir:%v", r.name, sgmRange.Dir)
+				return nil, fmt.Errorf("open segment:%v in dir:%v: %w", r.name, sgmRange.Dir, err)
 			}
 			segs = append(segs, s)
 		}
@@ -1017,7 +1015,7 @@ func (r *segmentBufReader) Read(b []byte) (n int, err error) {
 	r.off += n
 
 	// If we succeeded, or hit a non-EOF, we can stop.
-	if err == nil || err != io.EOF {
+	if err == nil || !errors.Is(err, io.EOF) {
 		return n, err
 	}
 

From a32fbc3658016a92834417695e80f511ed576ce1 Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Wed, 8 Nov 2023 14:19:12 +0100
Subject: [PATCH 26/66] head.go: Remove an unneeded snapshot trigger that was
 moved in https://github.com/prometheus/prometheus/pull/9328 and brougt back
 by mistake in 095f572d4a855fa5c3492fd98c0459abbff91a07 as part of
 https://github.com/prometheus/prometheus/pull/11447

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 tsdb/head.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index ee0ffcb8dc..d096bc6312 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1593,9 +1593,6 @@ func (h *Head) Close() error {
 	h.mmapHeadChunks()
 
 	errs := tsdb_errors.NewMulti(h.chunkDiskMapper.Close())
-	if errs.Err() == nil && h.opts.EnableMemorySnapshotOnShutdown {
-		errs.Add(h.performChunkSnapshot())
-	}
 	if h.wal != nil {
 		errs.Add(h.wal.Close())
 	}

From 071d5732afdb68fce428cb985ea13e176b9a876e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 27 Oct 2023 21:41:04 +0100
Subject: [PATCH 27/66] TSDB: refactor cleanup of chunks and series

Extract the middle of the loop into a function, so it will be
easier to modify the `seriesHashmap` data structure.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head.go | 119 ++++++++++++++++++++++++++-------------------------
 1 file changed, 61 insertions(+), 58 deletions(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index ee0ffcb8dc..c46ffe0619 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1777,70 +1777,73 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
 		deletedFromPrevStripe       = 0
 	)
 	minMmapFile = math.MaxInt32
-	// Run through all series and truncate old chunks. Mark those with no
-	// chunks left as deleted and store their ID.
+
+	// For one series, truncate old chunks and check if any chunks left. If not, mark as deleted and collect the ID.
+	check := func(i int, hash uint64, series *memSeries, deletedForCallback map[chunks.HeadSeriesRef]labels.Labels) {
+		series.Lock()
+		defer series.Unlock()
+
+		rmChunks += series.truncateChunksBefore(mint, minOOOMmapRef)
+
+		if len(series.mmappedChunks) > 0 {
+			seq, _ := series.mmappedChunks[0].ref.Unpack()
+			if seq < minMmapFile {
+				minMmapFile = seq
+			}
+		}
+		if series.ooo != nil && len(series.ooo.oooMmappedChunks) > 0 {
+			seq, _ := series.ooo.oooMmappedChunks[0].ref.Unpack()
+			if seq < minMmapFile {
+				minMmapFile = seq
+			}
+			for _, ch := range series.ooo.oooMmappedChunks {
+				if ch.minTime < minOOOTime {
+					minOOOTime = ch.minTime
+				}
+			}
+		}
+		if series.ooo != nil && series.ooo.oooHeadChunk != nil {
+			if series.ooo.oooHeadChunk.minTime < minOOOTime {
+				minOOOTime = series.ooo.oooHeadChunk.minTime
+			}
+		}
+		if len(series.mmappedChunks) > 0 || series.headChunks != nil || series.pendingCommit ||
+			(series.ooo != nil && (len(series.ooo.oooMmappedChunks) > 0 || series.ooo.oooHeadChunk != nil)) {
+			seriesMint := series.minTime()
+			if seriesMint < actualMint {
+				actualMint = seriesMint
+			}
+			return
+		}
+		// The series is gone entirely. We need to keep the series lock
+		// and make sure we have acquired the stripe locks for hash and ID of the
+		// series alike.
+		// If we don't hold them all, there's a very small chance that a series receives
+		// samples again while we are half-way into deleting it.
+		j := int(series.ref) & (s.size - 1)
+
+		if i != j {
+			s.locks[j].Lock()
+		}
+
+		deleted[storage.SeriesRef(series.ref)] = struct{}{}
+		s.hashes[i].del(hash, series.lset)
+		delete(s.series[j], series.ref)
+		deletedForCallback[series.ref] = series.lset
+
+		if i != j {
+			s.locks[j].Unlock()
+		}
+	}
+
+	// Run through all series shard by shard, checking which should be deleted.
 	for i := 0; i < s.size; i++ {
 		deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe)
 		s.locks[i].Lock()
 
 		for hash, all := range s.hashes[i] {
 			for _, series := range all {
-				series.Lock()
-				rmChunks += series.truncateChunksBefore(mint, minOOOMmapRef)
-
-				if len(series.mmappedChunks) > 0 {
-					seq, _ := series.mmappedChunks[0].ref.Unpack()
-					if seq < minMmapFile {
-						minMmapFile = seq
-					}
-				}
-				if series.ooo != nil && len(series.ooo.oooMmappedChunks) > 0 {
-					seq, _ := series.ooo.oooMmappedChunks[0].ref.Unpack()
-					if seq < minMmapFile {
-						minMmapFile = seq
-					}
-					for _, ch := range series.ooo.oooMmappedChunks {
-						if ch.minTime < minOOOTime {
-							minOOOTime = ch.minTime
-						}
-					}
-				}
-				if series.ooo != nil && series.ooo.oooHeadChunk != nil {
-					if series.ooo.oooHeadChunk.minTime < minOOOTime {
-						minOOOTime = series.ooo.oooHeadChunk.minTime
-					}
-				}
-				if len(series.mmappedChunks) > 0 || series.headChunks != nil || series.pendingCommit ||
-					(series.ooo != nil && (len(series.ooo.oooMmappedChunks) > 0 || series.ooo.oooHeadChunk != nil)) {
-					seriesMint := series.minTime()
-					if seriesMint < actualMint {
-						actualMint = seriesMint
-					}
-					series.Unlock()
-					continue
-				}
-
-				// The series is gone entirely. We need to keep the series lock
-				// and make sure we have acquired the stripe locks for hash and ID of the
-				// series alike.
-				// If we don't hold them all, there's a very small chance that a series receives
-				// samples again while we are half-way into deleting it.
-				j := int(series.ref) & (s.size - 1)
-
-				if i != j {
-					s.locks[j].Lock()
-				}
-
-				deleted[storage.SeriesRef(series.ref)] = struct{}{}
-				s.hashes[i].del(hash, series.lset)
-				delete(s.series[j], series.ref)
-				deletedForCallback[series.ref] = series.lset
-
-				if i != j {
-					s.locks[j].Unlock()
-				}
-
-				series.Unlock()
+				check(i, hash, series, deletedForCallback)
 			}
 		}
 

From ce4e757704866ad16bd456e1140cdfe45b380601 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 27 Oct 2023 22:02:34 +0100
Subject: [PATCH 28/66] TSDB: refine variable naming in chunk gc

Slight further refactor.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head.go | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index c46ffe0619..575177a7ad 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1779,7 +1779,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
 	minMmapFile = math.MaxInt32
 
 	// For one series, truncate old chunks and check if any chunks left. If not, mark as deleted and collect the ID.
-	check := func(i int, hash uint64, series *memSeries, deletedForCallback map[chunks.HeadSeriesRef]labels.Labels) {
+	check := func(hashShard int, hash uint64, series *memSeries, deletedForCallback map[chunks.HeadSeriesRef]labels.Labels) {
 		series.Lock()
 		defer series.Unlock()
 
@@ -1820,20 +1820,16 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
 		// series alike.
 		// If we don't hold them all, there's a very small chance that a series receives
 		// samples again while we are half-way into deleting it.
-		j := int(series.ref) & (s.size - 1)
-
-		if i != j {
-			s.locks[j].Lock()
+		refShard := int(series.ref) & (s.size - 1)
+		if hashShard != refShard {
+			s.locks[refShard].Lock()
+			defer s.locks[refShard].Unlock()
 		}
 
 		deleted[storage.SeriesRef(series.ref)] = struct{}{}
-		s.hashes[i].del(hash, series.lset)
-		delete(s.series[j], series.ref)
+		s.hashes[hashShard].del(hash, series.lset)
+		delete(s.series[refShard], series.ref)
 		deletedForCallback[series.ref] = series.lset
-
-		if i != j {
-			s.locks[j].Unlock()
-		}
 	}
 
 	// Run through all series shard by shard, checking which should be deleted.

From e6c0f69f98f1125fdb9f4db8fc6f5d13629fad28 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 27 Oct 2023 22:05:30 +0100
Subject: [PATCH 29/66] TSDB: Only pay for hash collisions when they happen

Instead of a map of slices of `*memSeries`, ready for any of them to
hold series where hash values collide, split into a map of `*memSeries`
and a map of slices which is usually empty, since hash collisions are
a one-in-a-billion thing.

The `del` method gets more complicated, to maintain the invariant that
a series is only in one of the two maps.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head.go | 75 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 21 deletions(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index 575177a7ad..410a226d8e 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1667,26 +1667,34 @@ func (h *Head) mmapHeadChunks() {
 	var count int
 	for i := 0; i < h.series.size; i++ {
 		h.series.locks[i].RLock()
-		for _, all := range h.series.hashes[i] {
-			for _, series := range all {
-				series.Lock()
-				count += series.mmapChunks(h.chunkDiskMapper)
-				series.Unlock()
-			}
+		for _, series := range h.series.series[i] {
+			series.Lock()
+			count += series.mmapChunks(h.chunkDiskMapper)
+			series.Unlock()
 		}
 		h.series.locks[i].RUnlock()
 	}
 	h.metrics.mmapChunksTotal.Add(float64(count))
 }
 
-// seriesHashmap is a simple hashmap for memSeries by their label set. It is built
-// on top of a regular hashmap and holds a slice of series to resolve hash collisions.
+// seriesHashmap lets TSDB find a memSeries by its label set, via a 64-bit hash.
+// There is one map for the common case where the hash value is unique, and a
+// second map for the case that two series have the same hash value.
+// Each series is in only one of the maps.
 // Its methods require the hash to be submitted with it to avoid re-computations throughout
 // the code.
-type seriesHashmap map[uint64][]*memSeries
+type seriesHashmap struct {
+	unique    map[uint64]*memSeries
+	conflicts map[uint64][]*memSeries
+}
 
-func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
-	for _, s := range m[hash] {
+func (m *seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
+	if s, found := m.unique[hash]; found {
+		if labels.Equal(s.lset, lset) {
+			return s
+		}
+	}
+	for _, s := range m.conflicts[hash] {
 		if labels.Equal(s.lset, lset) {
 			return s
 		}
@@ -1695,27 +1703,46 @@ func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
 }
 
 func (m seriesHashmap) set(hash uint64, s *memSeries) {
-	l := m[hash]
+	if existing, found := m.unique[hash]; !found || labels.Equal(existing.lset, s.lset) {
+		m.unique[hash] = s
+		return
+	}
+	l := m.conflicts[hash]
 	for i, prev := range l {
 		if labels.Equal(prev.lset, s.lset) {
 			l[i] = s
 			return
 		}
 	}
-	m[hash] = append(l, s)
+	m.conflicts[hash] = append(l, s)
 }
 
 func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
 	var rem []*memSeries
-	for _, s := range m[hash] {
-		if !labels.Equal(s.lset, lset) {
-			rem = append(rem, s)
+	unique, found := m.unique[hash]
+	switch {
+	case !found:
+		return
+	case labels.Equal(unique.lset, lset):
+		conflicts := m.conflicts[hash]
+		if len(conflicts) == 0 {
+			delete(m.unique, hash)
+			return
+		}
+		rem = conflicts
+	default:
+		rem = append(rem, unique)
+		for _, s := range m.conflicts[hash] {
+			if !labels.Equal(s.lset, lset) {
+				rem = append(rem, s)
+			}
 		}
 	}
-	if len(rem) == 0 {
-		delete(m, hash)
+	m.unique[hash] = rem[0]
+	if len(rem) == 1 {
+		delete(m.conflicts, hash)
 	} else {
-		m[hash] = rem
+		m.conflicts[hash] = rem[1:]
 	}
 }
 
@@ -1757,7 +1784,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st
 		s.series[i] = map[chunks.HeadSeriesRef]*memSeries{}
 	}
 	for i := range s.hashes {
-		s.hashes[i] = seriesHashmap{}
+		s.hashes[i] = seriesHashmap{
+			unique:    map[uint64]*memSeries{},
+			conflicts: map[uint64][]*memSeries{},
+		}
 	}
 	return s
 }
@@ -1837,7 +1867,10 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
 		deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe)
 		s.locks[i].Lock()
 
-		for hash, all := range s.hashes[i] {
+		for hash, series := range s.hashes[i].unique {
+			check(i, hash, series, deletedForCallback)
+		}
+		for hash, all := range s.hashes[i].conflicts {
 			for _, series := range all {
 				check(i, hash, series, deletedForCallback)
 			}

From 0996b78326fd8d41ba32e4670085599b5e63f542 Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Thu, 9 Nov 2023 15:38:35 +0100
Subject: [PATCH 30/66] remote_write: add a unit test to make sure the write
 client sends the extra http headers as expected

This will help letting prometheus off the hook from situations like
https://github.com/prometheus/prometheus/issues/13030

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 storage/remote/client_test.go | 40 +++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/storage/remote/client_test.go b/storage/remote/client_test.go
index 33ae7e4686..2acb8e279a 100644
--- a/storage/remote/client_test.go
+++ b/storage/remote/client_test.go
@@ -168,3 +168,43 @@ func TestRetryAfterDuration(t *testing.T) {
 		require.Equal(t, c.expected, retryAfterDuration(c.tInput), c.name)
 	}
 }
+
+func TestClientHeaders(t *testing.T) {
+	headersToSend := map[string]string{"Foo": "Bar", "Baz": "qux"}
+
+	var called bool
+	server := httptest.NewServer(
+		http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			called = true
+			receivedHeaders := r.Header
+			for name, value := range headersToSend {
+				require.Equal(
+					t,
+					[]string{value},
+					receivedHeaders.Values(name),
+					"expected %v to be part of the received headers %v",
+					headersToSend,
+					receivedHeaders,
+				)
+			}
+		}),
+	)
+	defer server.Close()
+
+	serverURL, err := url.Parse(server.URL)
+	require.NoError(t, err)
+
+	conf := &ClientConfig{
+		URL:     &config_util.URL{URL: serverURL},
+		Timeout: model.Duration(time.Second),
+		Headers: headersToSend,
+	}
+
+	c, err := NewWriteClient("c", conf)
+	require.NoError(t, err)
+
+	err = c.Store(context.Background(), []byte{}, 0)
+	require.NoError(t, err)
+
+	require.True(t, called, "The remote server wasn't called")
+}

From 0fe34f6d788be5ede6bc0d705b80343e12298b9c Mon Sep 17 00:00:00 2001
From: Julien Pivotto <roidelapluie@o11y.eu>
Date: Thu, 9 Nov 2023 10:18:00 -0600
Subject: [PATCH 31/66] Follow-up to #13060: Add test to ensure staleness
 tracking

This commit introduces an additional test in `scrape_test.go` to verify
staleness tracking when `trackTimestampStaleness` is enabled. The new
`TestScrapeLoopAppendStalenessIfTrackTimestampStaleness` function
asserts that the scrape loop correctly appends staleness markers when
necessary, reflecting the expected behavior with the feature flag turned
on.

The previous tests were only testing end of scrape staleness.

Signed-off-by: Julien Pivotto <roidelapluie@o11y.eu>
---
 scrape/scrape_test.go | 51 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index ccd651f49b..a2e0d00c6c 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -2038,6 +2038,57 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
 	require.Equal(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender)
 }
 
+func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) {
+	app := &collectResultAppender{}
+	sl := newScrapeLoop(context.Background(),
+		nil, nil, nil,
+		nopMutator,
+		nopMutator,
+		func(ctx context.Context) storage.Appender { return app },
+		nil,
+		0,
+		true,
+		true,
+		0, 0,
+		nil,
+		0,
+		0,
+		false,
+		false,
+		false,
+		nil,
+		false,
+		newTestScrapeMetrics(t),
+	)
+
+	now := time.Now()
+	slApp := sl.appender(context.Background())
+	_, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "", now)
+	require.NoError(t, err)
+	require.NoError(t, slApp.Commit())
+
+	slApp = sl.appender(context.Background())
+	_, _, _, err = sl.append(slApp, []byte(""), "", now.Add(time.Second))
+	require.NoError(t, err)
+	require.NoError(t, slApp.Commit())
+
+	// DeepEqual will report NaNs as being different, so replace with a different value.
+	app.resultFloats[1].f = 42
+	want := []floatSample{
+		{
+			metric: labels.FromStrings(model.MetricNameLabel, "metric_a"),
+			t:      1000,
+			f:      1,
+		},
+		{
+			metric: labels.FromStrings(model.MetricNameLabel, "metric_a"),
+			t:      timestamp.FromTime(now.Add(time.Second)),
+			f:      42,
+		},
+	}
+	require.Equal(t, want, app.resultFloats, "Appended samples not as expected:\n%s", appender)
+}
+
 func TestScrapeLoopAppendExemplar(t *testing.T) {
 	tests := []struct {
 		title                   string

From 2972cc5e8f655fbfb6a73e33465bfd0abb132835 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Thu, 9 Nov 2023 22:07:35 +0100
Subject: [PATCH 32/66] tsdb/index: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/index/index.go         | 117 ++++++++++++++++++------------------
 tsdb/index/index_test.go    |   6 +-
 tsdb/index/postings.go      |   4 +-
 tsdb/index/postings_test.go |   2 +-
 4 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/tsdb/index/index.go b/tsdb/index/index.go
index 893167c250..74212bced5 100644
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@@ -28,7 +28,6 @@ import (
 	"sort"
 	"unsafe"
 
-	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 
 	"github.com/prometheus/prometheus/model/labels"
@@ -172,7 +171,7 @@ func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) {
 	d := encoding.Decbuf{B: b[:len(b)-4]}
 
 	if d.Crc32(castagnoliTable) != expCRC {
-		return nil, errors.Wrap(encoding.ErrInvalidChecksum, "read TOC")
+		return nil, fmt.Errorf("read TOC: %w", encoding.ErrInvalidChecksum)
 	}
 
 	toc := &TOC{
@@ -197,7 +196,7 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
 	defer df.Close() // Close for platform windows.
 
 	if err := os.RemoveAll(fn); err != nil {
-		return nil, errors.Wrap(err, "remove any existing index at path")
+		return nil, fmt.Errorf("remove any existing index at path: %w", err)
 	}
 
 	// Main index file we are building.
@@ -216,7 +215,7 @@ func NewWriter(ctx context.Context, fn string) (*Writer, error) {
 		return nil, err
 	}
 	if err := df.Sync(); err != nil {
-		return nil, errors.Wrap(err, "sync dir")
+		return nil, fmt.Errorf("sync dir: %w", err)
 	}
 
 	iw := &Writer{
@@ -288,7 +287,7 @@ func (fw *FileWriter) Write(bufs ...[]byte) error {
 		// Once we move to compressed/varint representations in those areas, this limitation
 		// can be lifted.
 		if fw.pos > 16*math.MaxUint32 {
-			return errors.Errorf("%q exceeding max size of 64GiB", fw.name)
+			return fmt.Errorf("%q exceeding max size of 64GiB", fw.name)
 		}
 	}
 	return nil
@@ -315,7 +314,7 @@ func (fw *FileWriter) AddPadding(size int) error {
 	p = uint64(size) - p
 
 	if err := fw.Write(make([]byte, p)); err != nil {
-		return errors.Wrap(err, "add padding")
+		return fmt.Errorf("add padding: %w", err)
 	}
 	return nil
 }
@@ -353,7 +352,7 @@ func (w *Writer) ensureStage(s indexWriterStage) error {
 		}
 	}
 	if w.stage > s {
-		return errors.Errorf("invalid stage %q, currently at %q", s, w.stage)
+		return fmt.Errorf("invalid stage %q, currently at %q", s, w.stage)
 	}
 
 	// Mark start of sections in table of contents.
@@ -417,20 +416,20 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ...
 		return err
 	}
 	if labels.Compare(lset, w.lastSeries) <= 0 {
-		return errors.Errorf("out-of-order series added with label set %q", lset)
+		return fmt.Errorf("out-of-order series added with label set %q", lset)
 	}
 
 	if ref < w.lastRef && !w.lastSeries.IsEmpty() {
-		return errors.Errorf("series with reference greater than %d already added", ref)
+		return fmt.Errorf("series with reference greater than %d already added", ref)
 	}
 	// We add padding to 16 bytes to increase the addressable space we get through 4 byte
 	// series references.
 	if err := w.addPadding(16); err != nil {
-		return errors.Errorf("failed to write padding bytes: %v", err)
+		return fmt.Errorf("failed to write padding bytes: %v", err)
 	}
 
 	if w.f.pos%16 != 0 {
-		return errors.Errorf("series write not 16-byte aligned at %d", w.f.pos)
+		return fmt.Errorf("series write not 16-byte aligned at %d", w.f.pos)
 	}
 
 	w.buf2.Reset()
@@ -443,7 +442,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ...
 		if !ok {
 			nameIndex, err = w.symbols.ReverseLookup(l.Name)
 			if err != nil {
-				return errors.Errorf("symbol entry for %q does not exist, %v", l.Name, err)
+				return fmt.Errorf("symbol entry for %q does not exist, %v", l.Name, err)
 			}
 		}
 		w.labelNames[l.Name]++
@@ -453,7 +452,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ...
 		if !ok || cacheEntry.lastValue != l.Value {
 			valueIndex, err = w.symbols.ReverseLookup(l.Value)
 			if err != nil {
-				return errors.Errorf("symbol entry for %q does not exist, %v", l.Value, err)
+				return fmt.Errorf("symbol entry for %q does not exist, %v", l.Value, err)
 			}
 			w.symbolCache[l.Name] = symbolCacheEntry{
 				index:          nameIndex,
@@ -493,7 +492,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ...
 	w.buf2.PutHash(w.crc32)
 
 	if err := w.write(w.buf1.Get(), w.buf2.Get()); err != nil {
-		return errors.Wrap(err, "write series data")
+		return fmt.Errorf("write series data: %w", err)
 	}
 
 	w.lastSeries.CopyFrom(lset)
@@ -514,7 +513,7 @@ func (w *Writer) AddSymbol(sym string) error {
 		return err
 	}
 	if w.numSymbols != 0 && sym <= w.lastSymbol {
-		return errors.Errorf("symbol %q out-of-order", sym)
+		return fmt.Errorf("symbol %q out-of-order", sym)
 	}
 	w.lastSymbol = sym
 	w.numSymbols++
@@ -527,7 +526,7 @@ func (w *Writer) finishSymbols() error {
 	symbolTableSize := w.f.pos - w.toc.Symbols - 4
 	// The symbol table's <len> part is 4 bytes. So the total symbol table size must be less than or equal to 2^32-1
 	if symbolTableSize > math.MaxUint32 {
-		return errors.Errorf("symbol table size exceeds %d bytes: %d", uint32(math.MaxUint32), symbolTableSize)
+		return fmt.Errorf("symbol table size exceeds %d bytes: %d", uint32(math.MaxUint32), symbolTableSize)
 	}
 
 	// Write out the length and symbol count.
@@ -563,7 +562,7 @@ func (w *Writer) finishSymbols() error {
 	// Load in the symbol table efficiently for the rest of the index writing.
 	w.symbols, err = NewSymbols(realByteSlice(w.symbolFile.Bytes()), FormatV2, int(w.toc.Symbols))
 	if err != nil {
-		return errors.Wrap(err, "read symbols")
+		return fmt.Errorf("read symbols: %w", err)
 	}
 	return nil
 }
@@ -660,7 +659,7 @@ func (w *Writer) writeLabelIndex(name string, values []uint32) error {
 	w.buf1.Reset()
 	l := w.f.pos - startPos - 4
 	if l > math.MaxUint32 {
-		return errors.Errorf("label index size exceeds 4 bytes: %d", l)
+		return fmt.Errorf("label index size exceeds 4 bytes: %d", l)
 	}
 	w.buf1.PutBE32int(int(l))
 	if err := w.writeAt(w.buf1.Get(), startPos); err != nil {
@@ -704,7 +703,7 @@ func (w *Writer) writeLabelIndexesOffsetTable() error {
 	w.buf1.Reset()
 	l := w.f.pos - startPos - 4
 	if l > math.MaxUint32 {
-		return errors.Errorf("label indexes offset table size exceeds 4 bytes: %d", l)
+		return fmt.Errorf("label indexes offset table size exceeds 4 bytes: %d", l)
 	}
 	w.buf1.PutBE32int(int(l))
 	if err := w.writeAt(w.buf1.Get(), startPos); err != nil {
@@ -785,7 +784,7 @@ func (w *Writer) writePostingsOffsetTable() error {
 	w.buf1.Reset()
 	l := w.f.pos - startPos - 4
 	if l > math.MaxUint32 {
-		return errors.Errorf("postings offset table size exceeds 4 bytes: %d", l)
+		return fmt.Errorf("postings offset table size exceeds 4 bytes: %d", l)
 	}
 	w.buf1.PutBE32int(int(l))
 	if err := w.writeAt(w.buf1.Get(), startPos); err != nil {
@@ -839,7 +838,7 @@ func (w *Writer) writePostingsToTmpFiles() error {
 		d.ConsumePadding()
 		startPos := w.toc.LabelIndices - uint64(d.Len())
 		if startPos%16 != 0 {
-			return errors.Errorf("series not 16-byte aligned at %d", startPos)
+			return fmt.Errorf("series not 16-byte aligned at %d", startPos)
 		}
 		offsets = append(offsets, uint32(startPos/16))
 		// Skip to next series.
@@ -964,7 +963,7 @@ func (w *Writer) writePosting(name, value string, offs []uint32) error {
 
 	for _, off := range offs {
 		if off > (1<<32)-1 {
-			return errors.Errorf("series offset %d exceeds 4 bytes", off)
+			return fmt.Errorf("series offset %d exceeds 4 bytes", off)
 		}
 		w.buf1.PutBE32(off)
 	}
@@ -973,7 +972,7 @@ func (w *Writer) writePosting(name, value string, offs []uint32) error {
 	l := w.buf1.Len()
 	// We convert to uint to make code compile on 32-bit systems, as math.MaxUint32 doesn't fit into int there.
 	if uint(l) > math.MaxUint32 {
-		return errors.Errorf("posting size exceeds 4 bytes: %d", l)
+		return fmt.Errorf("posting size exceeds 4 bytes: %d", l)
 	}
 	w.buf2.PutBE32int(l)
 	w.buf1.PutHash(w.crc32)
@@ -1000,7 +999,7 @@ func (w *Writer) writePostings() error {
 		return err
 	}
 	if uint64(n) != w.fP.pos {
-		return errors.Errorf("wrote %d bytes to posting temporary file, but only read back %d", w.fP.pos, n)
+		return fmt.Errorf("wrote %d bytes to posting temporary file, but only read back %d", w.fP.pos, n)
 	}
 	w.f.pos += uint64(n)
 
@@ -1135,26 +1134,26 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 
 	// Verify header.
 	if r.b.Len() < HeaderLen {
-		return nil, errors.Wrap(encoding.ErrInvalidSize, "index header")
+		return nil, fmt.Errorf("index header: %w", encoding.ErrInvalidSize)
 	}
 	if m := binary.BigEndian.Uint32(r.b.Range(0, 4)); m != MagicIndex {
-		return nil, errors.Errorf("invalid magic number %x", m)
+		return nil, fmt.Errorf("invalid magic number %x", m)
 	}
 	r.version = int(r.b.Range(4, 5)[0])
 
 	if r.version != FormatV1 && r.version != FormatV2 {
-		return nil, errors.Errorf("unknown index file version %d", r.version)
+		return nil, fmt.Errorf("unknown index file version %d", r.version)
 	}
 
 	var err error
 	r.toc, err = NewTOCFromByteSlice(b)
 	if err != nil {
-		return nil, errors.Wrap(err, "read TOC")
+		return nil, fmt.Errorf("read TOC: %w", err)
 	}
 
 	r.symbols, err = NewSymbols(r.b, r.version, int(r.toc.Symbols))
 	if err != nil {
-		return nil, errors.Wrap(err, "read symbols")
+		return nil, fmt.Errorf("read symbols: %w", err)
 	}
 
 	if r.version == FormatV1 {
@@ -1169,7 +1168,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 			r.postingsV1[string(name)][string(value)] = off
 			return nil
 		}); err != nil {
-			return nil, errors.Wrap(err, "read postings table")
+			return nil, fmt.Errorf("read postings table: %w", err)
 		}
 	} else {
 		var lastName, lastValue []byte
@@ -1197,7 +1196,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 			valueCount++
 			return nil
 		}); err != nil {
-			return nil, errors.Wrap(err, "read postings table")
+			return nil, fmt.Errorf("read postings table: %w", err)
 		}
 		if lastName != nil {
 			r.postings[string(lastName)] = append(r.postings[string(lastName)], postingOffset{value: string(lastValue), off: lastOff})
@@ -1217,7 +1216,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 		}
 		off, err := r.symbols.ReverseLookup(k)
 		if err != nil {
-			return nil, errors.Wrap(err, "reverse symbol lookup")
+			return nil, fmt.Errorf("reverse symbol lookup: %w", err)
 		}
 		r.nameSymbols[off] = k
 	}
@@ -1252,7 +1251,7 @@ func (r *Reader) PostingsRanges() (map[labels.Label]Range, error) {
 		}
 		return nil
 	}); err != nil {
-		return nil, errors.Wrap(err, "read postings table")
+		return nil, fmt.Errorf("read postings table: %w", err)
 	}
 	return m, nil
 }
@@ -1302,7 +1301,7 @@ func (s Symbols) Lookup(o uint32) (string, error) {
 
 	if s.version == FormatV2 {
 		if int(o) >= s.seen {
-			return "", errors.Errorf("unknown symbol offset %d", o)
+			return "", fmt.Errorf("unknown symbol offset %d", o)
 		}
 		d.Skip(s.offsets[int(o/symbolFactor)])
 		// Walk until we find the one we want.
@@ -1321,7 +1320,7 @@ func (s Symbols) Lookup(o uint32) (string, error) {
 
 func (s Symbols) ReverseLookup(sym string) (uint32, error) {
 	if len(s.offsets) == 0 {
-		return 0, errors.Errorf("unknown symbol %q - no symbols", sym)
+		return 0, fmt.Errorf("unknown symbol %q - no symbols", sym)
 	}
 	i := sort.Search(len(s.offsets), func(i int) bool {
 		// Any decoding errors here will be lost, however
@@ -1354,7 +1353,7 @@ func (s Symbols) ReverseLookup(sym string) (uint32, error) {
 		return 0, d.Err()
 	}
 	if lastSymbol != sym {
-		return 0, errors.Errorf("unknown symbol %q", sym)
+		return 0, fmt.Errorf("unknown symbol %q", sym)
 	}
 	if s.version == FormatV2 {
 		return uint32(res), nil
@@ -1413,7 +1412,7 @@ func ReadPostingsOffsetTable(bs ByteSlice, off uint64, f func(name, value []byte
 		offsetPos := startLen - d.Len()
 
 		if keyCount := d.Uvarint(); keyCount != 2 {
-			return errors.Errorf("unexpected number of keys for postings offset table %d", keyCount)
+			return fmt.Errorf("unexpected number of keys for postings offset table %d", keyCount)
 		}
 		name := d.UvarintBytes()
 		value := d.UvarintBytes()
@@ -1468,7 +1467,7 @@ func (r *Reader) SortedLabelValues(ctx context.Context, name string, matchers ..
 // TODO(replay): Support filtering by matchers.
 func (r *Reader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
 	if len(matchers) > 0 {
-		return nil, errors.Errorf("matchers parameter is not implemented: %+v", matchers)
+		return nil, fmt.Errorf("matchers parameter is not implemented: %+v", matchers)
 	}
 
 	if r.version == FormatV1 {
@@ -1516,7 +1515,7 @@ func (r *Reader) LabelValues(ctx context.Context, name string, matchers ...*labe
 		d.Uvarint64() // Offset.
 	}
 	if d.Err() != nil {
-		return nil, errors.Wrap(d.Err(), "get postings offset entry")
+		return nil, fmt.Errorf("get postings offset entry: %w", d.Err())
 	}
 
 	return values, ctx.Err()
@@ -1542,12 +1541,12 @@ func (r *Reader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([
 		d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable)
 		buf := d.Get()
 		if d.Err() != nil {
-			return nil, errors.Wrap(d.Err(), "get buffer for series")
+			return nil, fmt.Errorf("get buffer for series: %w", d.Err())
 		}
 
 		offsets, err := r.dec.LabelNamesOffsetsFor(buf)
 		if err != nil {
-			return nil, errors.Wrap(err, "get label name offsets")
+			return nil, fmt.Errorf("get label name offsets: %w", err)
 		}
 		for _, off := range offsets {
 			offsetsMap[off] = struct{}{}
@@ -1559,7 +1558,7 @@ func (r *Reader) LabelNamesFor(ctx context.Context, ids ...storage.SeriesRef) ([
 	for off := range offsetsMap {
 		name, err := r.lookupSymbol(ctx, off)
 		if err != nil {
-			return nil, errors.Wrap(err, "lookup symbol in LabelNamesFor")
+			return nil, fmt.Errorf("lookup symbol in LabelNamesFor: %w", err)
 		}
 		names = append(names, name)
 	}
@@ -1580,7 +1579,7 @@ func (r *Reader) LabelValueFor(ctx context.Context, id storage.SeriesRef, label
 	d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable)
 	buf := d.Get()
 	if d.Err() != nil {
-		return "", errors.Wrap(d.Err(), "label values for")
+		return "", fmt.Errorf("label values for: %w", d.Err())
 	}
 
 	value, err := r.dec.LabelValueFor(ctx, buf, label)
@@ -1607,7 +1606,11 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch
 	if d.Err() != nil {
 		return d.Err()
 	}
-	return errors.Wrap(r.dec.Series(d.Get(), builder, chks), "read series")
+	err := r.dec.Series(d.Get(), builder, chks)
+	if err != nil {
+		return fmt.Errorf("read series: %w", err)
+	}
+	return nil
 }
 
 func (r *Reader) Postings(ctx context.Context, name string, values ...string) (Postings, error) {
@@ -1626,7 +1629,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P
 			d := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable)
 			_, p, err := r.dec.Postings(d.Get())
 			if err != nil {
-				return nil, errors.Wrap(err, "decode postings")
+				return nil, fmt.Errorf("decode postings: %w", err)
 			}
 			res = append(res, p)
 		}
@@ -1688,7 +1691,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P
 					d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable)
 					_, p, err := r.dec.Postings(d2.Get())
 					if err != nil {
-						return nil, errors.Wrap(err, "decode postings")
+						return nil, fmt.Errorf("decode postings: %w", err)
 					}
 					res = append(res, p)
 				}
@@ -1704,10 +1707,10 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P
 			}
 		}
 		if d.Err() != nil {
-			return nil, errors.Wrap(d.Err(), "get postings offset entry")
+			return nil, fmt.Errorf("get postings offset entry: %w", d.Err())
 		}
 		if ctx.Err() != nil {
-			return nil, errors.Wrap(ctx.Err(), "get postings offset entry")
+			return nil, fmt.Errorf("get postings offset entry: %w", ctx.Err())
 		}
 	}
 
@@ -1729,7 +1732,7 @@ func (r *Reader) Size() int64 {
 // TODO(twilkie) implement support for matchers.
 func (r *Reader) LabelNames(_ context.Context, matchers ...*labels.Matcher) ([]string, error) {
 	if len(matchers) > 0 {
-		return nil, errors.Errorf("matchers parameter is not implemented: %+v", matchers)
+		return nil, fmt.Errorf("matchers parameter is not implemented: %+v", matchers)
 	}
 
 	labelNames := make([]string, 0, len(r.postings))
@@ -1800,7 +1803,7 @@ func (dec *Decoder) LabelNamesOffsetsFor(b []byte) ([]uint32, error) {
 		_ = d.Uvarint() // skip the label value
 
 		if d.Err() != nil {
-			return nil, errors.Wrap(d.Err(), "read series label offsets")
+			return nil, fmt.Errorf("read series label offsets: %w", d.Err())
 		}
 	}
 
@@ -1817,18 +1820,18 @@ func (dec *Decoder) LabelValueFor(ctx context.Context, b []byte, label string) (
 		lvo := uint32(d.Uvarint())
 
 		if d.Err() != nil {
-			return "", errors.Wrap(d.Err(), "read series label offsets")
+			return "", fmt.Errorf("read series label offsets: %w", d.Err())
 		}
 
 		ln, err := dec.LookupSymbol(ctx, lno)
 		if err != nil {
-			return "", errors.Wrap(err, "lookup label name")
+			return "", fmt.Errorf("lookup label name: %w", err)
 		}
 
 		if ln == label {
 			lv, err := dec.LookupSymbol(ctx, lvo)
 			if err != nil {
-				return "", errors.Wrap(err, "lookup label value")
+				return "", fmt.Errorf("lookup label value: %w", err)
 			}
 
 			return lv, nil
@@ -1853,16 +1856,16 @@ func (dec *Decoder) Series(b []byte, builder *labels.ScratchBuilder, chks *[]chu
 		lvo := uint32(d.Uvarint())
 
 		if d.Err() != nil {
-			return errors.Wrap(d.Err(), "read series label offsets")
+			return fmt.Errorf("read series label offsets: %w", d.Err())
 		}
 
 		ln, err := dec.LookupSymbol(context.TODO(), lno)
 		if err != nil {
-			return errors.Wrap(err, "lookup label name")
+			return fmt.Errorf("lookup label name: %w", err)
 		}
 		lv, err := dec.LookupSymbol(context.TODO(), lvo)
 		if err != nil {
-			return errors.Wrap(err, "lookup label value")
+			return fmt.Errorf("lookup label value: %w", err)
 		}
 
 		builder.Add(ln, lv)
@@ -1894,7 +1897,7 @@ func (dec *Decoder) Series(b []byte, builder *labels.ScratchBuilder, chks *[]chu
 		t0 = maxt
 
 		if d.Err() != nil {
-			return errors.Wrapf(d.Err(), "read meta for chunk %d", i)
+			return fmt.Errorf("read meta for chunk %d: %w", i, d.Err())
 		}
 
 		*chks = append(*chks, chunks.Meta{
diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go
index 7a6683da2b..6c5e313d43 100644
--- a/tsdb/index/index_test.go
+++ b/tsdb/index/index_test.go
@@ -15,6 +15,7 @@ package index
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"hash/crc32"
 	"math/rand"
@@ -23,7 +24,6 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/pkg/errors"
 	"github.com/stretchr/testify/require"
 	"go.uber.org/goleak"
 
@@ -66,7 +66,7 @@ func (m mockIndex) Symbols() (map[string]struct{}, error) {
 
 func (m mockIndex) AddSeries(ref storage.SeriesRef, l labels.Labels, chunks ...chunks.Meta) error {
 	if _, ok := m.series[ref]; ok {
-		return errors.Errorf("series with reference %d already added", ref)
+		return fmt.Errorf("series with reference %d already added", ref)
 	}
 	l.Range(func(lbl labels.Label) {
 		m.symbols[lbl.Name] = struct{}{}
@@ -115,7 +115,7 @@ func (m mockIndex) Postings(ctx context.Context, name string, values ...string)
 func (m mockIndex) SortedPostings(p Postings) Postings {
 	ep, err := ExpandPostings(p)
 	if err != nil {
-		return ErrPostings(errors.Wrap(err, "expand postings"))
+		return ErrPostings(fmt.Errorf("expand postings: %w", err))
 	}
 
 	sort.Slice(ep, func(i, j int) bool {
diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go
index f79a8d4cfc..c839574276 100644
--- a/tsdb/index/postings.go
+++ b/tsdb/index/postings.go
@@ -17,12 +17,12 @@ import (
 	"container/heap"
 	"context"
 	"encoding/binary"
+	"fmt"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
 
-	"github.com/pkg/errors"
 	"golang.org/x/exp/slices"
 
 	"github.com/prometheus/prometheus/model/labels"
@@ -927,7 +927,7 @@ func (h *postingsWithIndexHeap) next() error {
 	}
 
 	if err := pi.p.Err(); err != nil {
-		return errors.Wrapf(err, "postings %d", pi.index)
+		return fmt.Errorf("postings %d: %w", pi.index, err)
 	}
 	h.popIndex()
 	return nil
diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go
index 783b5f84fc..04282c332a 100644
--- a/tsdb/index/postings_test.go
+++ b/tsdb/index/postings_test.go
@@ -17,13 +17,13 @@ import (
 	"container/heap"
 	"context"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"math/rand"
 	"sort"
 	"strconv"
 	"testing"
 
-	"github.com/pkg/errors"
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/labels"

From 80d2f992ae791756de8e21cf25c2e1acbf99ca3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jacob=20Baung=C3=A5rd=20Hansen?= <jacobbaungard@redhat.com>
Date: Fri, 10 Nov 2023 11:22:32 +0100
Subject: [PATCH 33/66] codemirror-promql: Add request header to client
 (#13118)

With this commit we make it possible to adjust the request headers sent
to Prometheus by the codemirror-promql extension. This enables
customizing the headers sent, without re-implementing the Prometheus
client completely.

Signed-off-by: Jacob Baungard Hansen <jacobbaungard@redhat.com>
---
 web/ui/module/codemirror-promql/README.md              |  9 +++++++++
 .../module/codemirror-promql/src/client/prometheus.ts  | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/web/ui/module/codemirror-promql/README.md b/web/ui/module/codemirror-promql/README.md
index 627e4fe15a..8fb188e869 100644
--- a/web/ui/module/codemirror-promql/README.md
+++ b/web/ui/module/codemirror-promql/README.md
@@ -161,6 +161,15 @@ You can change it to use the HTTP method `GET` if you prefer.
 const promQL = new PromQLExtension().setComplete({remote: {httpMethod: 'GET'}})
 ```
 
+###### HTTP request headers
+
+If you need to send specific HTTP headers along with the requests to Prometheus, you can adjust that as follows:
+
+```typescript
+const customHeaders = new Headers({'header-name': 'test-value'});
+const promql = new PromQLExtension().setComplete({remote: {requestHeaders: customHeaders}})
+```
+
 ###### Override the API Prefix
 
 The default Prometheus Client, when building the query to get data from Prometheus, is using an API prefix which is by
diff --git a/web/ui/module/codemirror-promql/src/client/prometheus.ts b/web/ui/module/codemirror-promql/src/client/prometheus.ts
index a9c7f74568..873cbb0d22 100644
--- a/web/ui/module/codemirror-promql/src/client/prometheus.ts
+++ b/web/ui/module/codemirror-promql/src/client/prometheus.ts
@@ -58,6 +58,7 @@ export interface PrometheusConfig {
   cache?: CacheConfig;
   httpMethod?: 'POST' | 'GET';
   apiPrefix?: string;
+  requestHeaders?: Headers;
 }
 
 interface APIResponse<T> {
@@ -84,6 +85,7 @@ export class HTTPPrometheusClient implements PrometheusClient {
   // For some reason, just assigning via "= fetch" here does not end up executing fetch correctly
   // when calling it, thus the indirection via another function wrapper.
   private readonly fetchFn: FetchFn = (input: RequestInfo, init?: RequestInit): Promise<Response> => fetch(input, init);
+  private requestHeaders: Headers = new Headers();
 
   constructor(config: PrometheusConfig) {
     this.url = config.url ? config.url : '';
@@ -100,6 +102,9 @@ export class HTTPPrometheusClient implements PrometheusClient {
     if (config.apiPrefix) {
       this.apiPrefix = config.apiPrefix;
     }
+    if (config.requestHeaders) {
+      this.requestHeaders = config.requestHeaders;
+    }
   }
 
   labelNames(metricName?: string): Promise<string[]> {
@@ -221,6 +226,11 @@ export class HTTPPrometheusClient implements PrometheusClient {
   }
 
   private fetchAPI<T>(resource: string, init?: RequestInit): Promise<T> {
+    if (init) {
+      init.headers = this.requestHeaders;
+    } else {
+      init = { headers: this.requestHeaders };
+    }
     return this.fetchFn(this.url + resource, init)
       .then((res) => {
         if (!res.ok && ![badRequest, unprocessableEntity, serviceUnavailable].includes(res.status)) {

From e250f09b5d34d6c936b18f3b7699df23a0555092 Mon Sep 17 00:00:00 2001
From: Ziqi Zhao <zhaoziqi9146@gmail.com>
Date: Fri, 10 Nov 2023 21:33:34 +0800
Subject: [PATCH 34/66] change origin schema in `ReduceResolution` method of
 histogram and float histogram (#13116)

* change origin schema in ReduceResolution method of histogram and float histogram

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>

---------

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
---
 model/histogram/float_histogram.go      |  2 +-
 model/histogram/float_histogram_test.go | 43 +++++++++++++++++++++++++
 model/histogram/histogram.go            |  1 +
 model/histogram/histogram_test.go       | 43 +++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index 212b028800..e0f5d208e2 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -1112,6 +1112,6 @@ func floatBucketsMatch(b1, b2 []float64) bool {
 func (h *FloatHistogram) ReduceResolution(targetSchema int32) *FloatHistogram {
 	h.PositiveSpans, h.PositiveBuckets = reduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false)
 	h.NegativeSpans, h.NegativeBuckets = reduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false)
-
+	h.Schema = targetSchema
 	return h
 }
diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go
index 6f445c0cfa..bfe3525fa0 100644
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@@ -2442,3 +2442,46 @@ func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
 	}
 	return Spans, Buckets
 }
+
+func TestFloatHistogramReduceResolution(t *testing.T) {
+	tcs := map[string]struct {
+		origin *FloatHistogram
+		target *FloatHistogram
+	}{
+		"valid float histogram": {
+			origin: &FloatHistogram{
+				Schema: 0,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 4},
+					{Offset: 0, Length: 0},
+					{Offset: 3, Length: 2},
+				},
+				PositiveBuckets: []float64{1, 3, 1, 2, 1, 1},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 4},
+					{Offset: 0, Length: 0},
+					{Offset: 3, Length: 2},
+				},
+				NegativeBuckets: []float64{1, 3, 1, 2, 1, 1},
+			},
+			target: &FloatHistogram{
+				Schema: -1,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 3},
+					{Offset: 1, Length: 1},
+				},
+				PositiveBuckets: []float64{1, 4, 2, 2},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 3},
+					{Offset: 1, Length: 1},
+				},
+				NegativeBuckets: []float64{1, 4, 2, 2},
+			},
+		},
+	}
+
+	for _, tc := range tcs {
+		target := tc.origin.ReduceResolution(tc.target.Schema)
+		require.Equal(t, tc.target, target)
+	}
+}
diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go
index 4699bd3cbe..3ebb27fbc9 100644
--- a/model/histogram/histogram.go
+++ b/model/histogram/histogram.go
@@ -503,5 +503,6 @@ func (h *Histogram) ReduceResolution(targetSchema int32) *Histogram {
 	h.NegativeSpans, h.NegativeBuckets = reduceResolution(
 		h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, true,
 	)
+	h.Schema = targetSchema
 	return h
 }
diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go
index 6f12f53e82..5aa9ca6feb 100644
--- a/model/histogram/histogram_test.go
+++ b/model/histogram/histogram_test.go
@@ -967,3 +967,46 @@ func BenchmarkHistogramValidation(b *testing.B) {
 		require.NoError(b, h.Validate())
 	}
 }
+
+func TestHistogramReduceResolution(t *testing.T) {
+	tcs := map[string]struct {
+		origin *Histogram
+		target *Histogram
+	}{
+		"valid histogram": {
+			origin: &Histogram{
+				Schema: 0,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 4},
+					{Offset: 0, Length: 0},
+					{Offset: 3, Length: 2},
+				},
+				PositiveBuckets: []int64{1, 2, -2, 1, -1, 0},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 4},
+					{Offset: 0, Length: 0},
+					{Offset: 3, Length: 2},
+				},
+				NegativeBuckets: []int64{1, 2, -2, 1, -1, 0},
+			},
+			target: &Histogram{
+				Schema: -1,
+				PositiveSpans: []Span{
+					{Offset: 0, Length: 3},
+					{Offset: 1, Length: 1},
+				},
+				PositiveBuckets: []int64{1, 3, -2, 0},
+				NegativeSpans: []Span{
+					{Offset: 0, Length: 3},
+					{Offset: 1, Length: 1},
+				},
+				NegativeBuckets: []int64{1, 3, -2, 0},
+			},
+		},
+	}
+
+	for _, tc := range tcs {
+		target := tc.origin.ReduceResolution(tc.target.Schema)
+		require.Equal(t, tc.target, target)
+	}
+}

From 39a35d92bcc476f9c1072e9fb797b168c73826c6 Mon Sep 17 00:00:00 2001
From: George Krajcsovits <krajorama@users.noreply.github.com>
Date: Sat, 11 Nov 2023 17:30:16 +0100
Subject: [PATCH 35/66] tsdb/head: wlog exemplars after samples (#13113)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When samples are committed in the head, they are also written to the WAL.
The order of WAL records should be sample then exemplar, but this was
not the case for native histogram samples. This PR fixes that.

The problem with the wrong order is that remote write reads the WAL and
sends the recorded timeseries in the WAL order, which means exemplars
arrived before histogram samples. If the receiving side is Prometheus
TSDB and the series has not existed before then the exemplar does not
currently create the series. Which means the exemplar is rejected and lost.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/head_append.go | 20 +++++++++-------
 tsdb/head_test.go   | 57 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index 3663c800ae..785e99db07 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -689,14 +689,6 @@ func (a *headAppender) log() error {
 			return errors.Wrap(err, "log samples")
 		}
 	}
-	if len(a.exemplars) > 0 {
-		rec = enc.Exemplars(exemplarsForEncoding(a.exemplars), buf)
-		buf = rec[:0]
-
-		if err := a.head.wal.Log(rec); err != nil {
-			return errors.Wrap(err, "log exemplars")
-		}
-	}
 	if len(a.histograms) > 0 {
 		rec = enc.HistogramSamples(a.histograms, buf)
 		buf = rec[:0]
@@ -711,6 +703,18 @@ func (a *headAppender) log() error {
 			return errors.Wrap(err, "log float histograms")
 		}
 	}
+	// Exemplars should be logged after samples (float/native histogram/etc),
+	// otherwise it might happen that we send the exemplars in a remote write
+	// batch before the samples, which in turn means the exemplar is rejected
+	// for missing series, since series are created due to samples.
+	if len(a.exemplars) > 0 {
+		rec = enc.Exemplars(exemplarsForEncoding(a.exemplars), buf)
+		buf = rec[:0]
+
+		if err := a.head.wal.Log(rec); err != nil {
+			return errors.Wrap(err, "log exemplars")
+		}
+	}
 	return nil
 }
 
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index 1216dd0a69..253f92d61b 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -22,6 +22,7 @@ import (
 	"os"
 	"path"
 	"path/filepath"
+	"reflect"
 	"sort"
 	"strconv"
 	"strings"
@@ -190,6 +191,10 @@ func readTestWAL(t testing.TB, dir string) (recs []interface{}) {
 			meta, err := dec.Metadata(rec, nil)
 			require.NoError(t, err)
 			recs = append(recs, meta)
+		case record.Exemplars:
+			exemplars, err := dec.Exemplars(rec, nil)
+			require.NoError(t, err)
+			recs = append(recs, exemplars)
 		default:
 			t.Fatalf("unknown record type")
 		}
@@ -5457,3 +5462,55 @@ func TestHeadDetectsDuplicateSampleAtSizeLimit(t *testing.T) {
 
 	require.Equal(t, numSamples/2, storedSampleCount)
 }
+
+func TestWALSampleAndExemplarOrder(t *testing.T) {
+	lbls := labels.FromStrings("foo", "bar")
+	testcases := map[string]struct {
+		appendF      func(app storage.Appender, ts int64) (storage.SeriesRef, error)
+		expectedType reflect.Type
+	}{
+		"float sample": {
+			appendF: func(app storage.Appender, ts int64) (storage.SeriesRef, error) {
+				return app.Append(0, lbls, ts, 1.0)
+			},
+			expectedType: reflect.TypeOf([]record.RefSample{}),
+		},
+		"histogram sample": {
+			appendF: func(app storage.Appender, ts int64) (storage.SeriesRef, error) {
+				return app.AppendHistogram(0, lbls, ts, tsdbutil.GenerateTestHistogram(1), nil)
+			},
+			expectedType: reflect.TypeOf([]record.RefHistogramSample{}),
+		},
+		"float histogram sample": {
+			appendF: func(app storage.Appender, ts int64) (storage.SeriesRef, error) {
+				return app.AppendHistogram(0, lbls, ts, nil, tsdbutil.GenerateTestFloatHistogram(1))
+			},
+			expectedType: reflect.TypeOf([]record.RefFloatHistogramSample{}),
+		},
+	}
+
+	for testName, tc := range testcases {
+		t.Run(testName, func(t *testing.T) {
+			h, w := newTestHead(t, 1000, wlog.CompressionNone, false)
+			defer func() {
+				require.NoError(t, h.Close())
+			}()
+
+			app := h.Appender(context.Background())
+			ref, err := tc.appendF(app, 10)
+			require.NoError(t, err)
+			app.AppendExemplar(ref, lbls, exemplar.Exemplar{Value: 1.0, Ts: 5})
+
+			app.Commit()
+
+			recs := readTestWAL(t, w.Dir())
+			require.Len(t, recs, 3)
+			_, ok := recs[0].([]record.RefSeries)
+			require.True(t, ok, "expected first record to be a RefSeries")
+			actualType := reflect.TypeOf(recs[1])
+			require.Equal(t, tc.expectedType, actualType, "expected second record to be a %s", tc.expectedType)
+			_, ok = recs[2].([]record.RefExemplar)
+			require.True(t, ok, "expected third record to be a RefExemplar")
+		})
+	}
+}

From 4d6d3c171566f38450598685f7584ece3ad65692 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 19:01:11 +0100
Subject: [PATCH 36/66] tsdb/encoding: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/encoding/encoding.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go
index ab97876a36..cd98fbd82f 100644
--- a/tsdb/encoding/encoding.go
+++ b/tsdb/encoding/encoding.go
@@ -15,13 +15,14 @@ package encoding
 
 import (
 	"encoding/binary"
+	"errors"
+	"fmt"
 	"hash"
 	"hash/crc32"
 	"math"
 	"unsafe"
 
 	"github.com/dennwc/varint"
-	"github.com/pkg/errors"
 )
 
 var (
@@ -153,7 +154,7 @@ func NewDecbufUvarintAt(bs ByteSlice, off int, castagnoliTable *crc32.Table) Dec
 
 	l, n := varint.Uvarint(b)
 	if n <= 0 || n > binary.MaxVarintLen32 {
-		return Decbuf{E: errors.Errorf("invalid uvarint %d", n)}
+		return Decbuf{E: fmt.Errorf("invalid uvarint %d", n)}
 	}
 
 	if bs.Len() < off+n+int(l)+4 {

From 118460a64fc3662755bfa63a0f882fdfbda0c86b Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 19:19:50 +0100
Subject: [PATCH 37/66] tsdb/tombstones: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/tombstones/tombstones.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tsdb/tombstones/tombstones.go b/tsdb/tombstones/tombstones.go
index 94daf51953..f7884f9bf5 100644
--- a/tsdb/tombstones/tombstones.go
+++ b/tsdb/tombstones/tombstones.go
@@ -15,6 +15,7 @@ package tombstones
 
 import (
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"hash"
 	"hash/crc32"
@@ -26,7 +27,6 @@ import (
 
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb/encoding"
@@ -109,17 +109,17 @@ func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) {
 
 	bytes, err := Encode(tr)
 	if err != nil {
-		return 0, errors.Wrap(err, "encoding tombstones")
+		return 0, fmt.Errorf("encoding tombstones: %w", err)
 	}
 
 	// Ignore first byte which is the format type. We do this for compatibility.
 	if _, err := hash.Write(bytes[tombstoneFormatVersionSize:]); err != nil {
-		return 0, errors.Wrap(err, "calculating hash for tombstones")
+		return 0, fmt.Errorf("calculating hash for tombstones: %w", err)
 	}
 
 	n, err = f.Write(bytes)
 	if err != nil {
-		return 0, errors.Wrap(err, "writing tombstones")
+		return 0, fmt.Errorf("writing tombstones: %w", err)
 	}
 	size += n
 
@@ -161,7 +161,7 @@ func Encode(tr Reader) ([]byte, error) {
 func Decode(b []byte) (Reader, error) {
 	d := &encoding.Decbuf{B: b}
 	if flag := d.Byte(); flag != tombstoneFormatV1 {
-		return nil, errors.Errorf("invalid tombstone format %x", flag)
+		return nil, fmt.Errorf("invalid tombstone format %x", flag)
 	}
 
 	if d.Err() != nil {
@@ -199,7 +199,7 @@ func ReadTombstones(dir string) (Reader, int64, error) {
 	}
 
 	if len(b) < tombstonesHeaderSize {
-		return nil, 0, errors.Wrap(encoding.ErrInvalidSize, "tombstones header")
+		return nil, 0, fmt.Errorf("tombstones header", encoding.ErrInvalidSize)
 	}
 
 	d := &encoding.Decbuf{B: b[:len(b)-tombstonesCRCSize]}
@@ -211,7 +211,7 @@ func ReadTombstones(dir string) (Reader, int64, error) {
 	hash := newCRC32()
 	// Ignore first byte which is the format type.
 	if _, err := hash.Write(d.Get()[tombstoneFormatVersionSize:]); err != nil {
-		return nil, 0, errors.Wrap(err, "write to hash")
+		return nil, 0, fmt.Errorf("write to hash: %w", err)
 	}
 	if binary.BigEndian.Uint32(b[len(b)-tombstonesCRCSize:]) != hash.Sum32() {
 		return nil, 0, errors.New("checksum did not match")

From c74b7ad4fba0c91b1ff2e38adc1ab548f98a64f0 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 19:22:06 +0100
Subject: [PATCH 38/66] Update tombstones.go

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/tombstones/tombstones.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tsdb/tombstones/tombstones.go b/tsdb/tombstones/tombstones.go
index f7884f9bf5..4cea5005db 100644
--- a/tsdb/tombstones/tombstones.go
+++ b/tsdb/tombstones/tombstones.go
@@ -199,7 +199,7 @@ func ReadTombstones(dir string) (Reader, int64, error) {
 	}
 
 	if len(b) < tombstonesHeaderSize {
-		return nil, 0, fmt.Errorf("tombstones header", encoding.ErrInvalidSize)
+		return nil, 0, fmt.Errorf("tombstones header: %w", encoding.ErrInvalidSize)
 	}
 
 	d := &encoding.Decbuf{B: b[:len(b)-tombstonesCRCSize]}

From 63691d82a5c892ff52dde4048fa549ff35ecbf5f Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 20:52:49 +0100
Subject: [PATCH 39/66] tsdb/record: use Go standard errors package

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/record/record.go | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tsdb/record/record.go b/tsdb/record/record.go
index 442e6cd8cb..ad4c324c6c 100644
--- a/tsdb/record/record.go
+++ b/tsdb/record/record.go
@@ -16,10 +16,10 @@
 package record
 
 import (
+	"errors"
+	"fmt"
 	"math"
 
-	"github.com/pkg/errors"
-
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/textparse"
@@ -229,7 +229,7 @@ func (d *Decoder) Series(rec []byte, series []RefSeries) ([]RefSeries, error) {
 		return nil, dec.Err()
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return series, nil
 }
@@ -272,7 +272,7 @@ func (d *Decoder) Metadata(rec []byte, metadata []RefMetadata) ([]RefMetadata, e
 		return nil, dec.Err()
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return metadata, nil
 }
@@ -321,10 +321,10 @@ func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error)
 	}
 
 	if dec.Err() != nil {
-		return nil, errors.Wrapf(dec.Err(), "decode error after %d samples", len(samples))
+		return nil, fmt.Errorf("decode error after %d samples: %w", len(samples), dec.Err())
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return samples, nil
 }
@@ -348,7 +348,7 @@ func (d *Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombston
 		return nil, dec.Err()
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return tstones, nil
 }
@@ -386,10 +386,10 @@ func (d *Decoder) ExemplarsFromBuffer(dec *encoding.Decbuf, exemplars []RefExemp
 	}
 
 	if dec.Err() != nil {
-		return nil, errors.Wrapf(dec.Err(), "decode error after %d exemplars", len(exemplars))
+		return nil, fmt.Errorf("decode error after %d exemplars: %w", len(exemplars), dec.Err())
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return exemplars, nil
 }
@@ -414,10 +414,10 @@ func (d *Decoder) MmapMarkers(rec []byte, markers []RefMmapMarker) ([]RefMmapMar
 	}
 
 	if dec.Err() != nil {
-		return nil, errors.Wrapf(dec.Err(), "decode error after %d mmap markers", len(markers))
+		return nil, fmt.Errorf("decode error after %d mmap markers: %w", len(markers), dec.Err())
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return markers, nil
 }
@@ -450,10 +450,10 @@ func (d *Decoder) HistogramSamples(rec []byte, histograms []RefHistogramSample)
 	}
 
 	if dec.Err() != nil {
-		return nil, errors.Wrapf(dec.Err(), "decode error after %d histograms", len(histograms))
+		return nil, fmt.Errorf("decode error after %d histograms: %w", len(histograms), dec.Err())
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return histograms, nil
 }
@@ -532,10 +532,10 @@ func (d *Decoder) FloatHistogramSamples(rec []byte, histograms []RefFloatHistogr
 	}
 
 	if dec.Err() != nil {
-		return nil, errors.Wrapf(dec.Err(), "decode error after %d histograms", len(histograms))
+		return nil, fmt.Errorf("decode error after %d histograms: %w", len(histograms),  dec.Err())
 	}
 	if len(dec.B) > 0 {
-		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return histograms, nil
 }

From 69c07ec6ae295c5879e672fb7de6f9715b5e73c7 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 20:57:42 +0100
Subject: [PATCH 40/66] Update record_test.go

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/record/record_test.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go
index 5189423142..9111350a73 100644
--- a/tsdb/record/record_test.go
+++ b/tsdb/record/record_test.go
@@ -15,10 +15,10 @@
 package record
 
 import (
+	"errors"
 	"math/rand"
 	"testing"
 
-	"github.com/pkg/errors"
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/histogram"
@@ -209,7 +209,7 @@ func TestRecord_Corrupted(t *testing.T) {
 
 		corrupted := enc.Samples(samples, nil)[:8]
 		_, err := dec.Samples(corrupted, nil)
-		require.Equal(t, errors.Cause(err), encoding.ErrInvalidSize)
+		require.True(t, errors.Is(err, encoding.ErrInvalidSize))
 	})
 
 	t.Run("Test corrupted tombstone record", func(t *testing.T) {
@@ -232,7 +232,7 @@ func TestRecord_Corrupted(t *testing.T) {
 
 		corrupted := enc.Exemplars(exemplars, nil)[:8]
 		_, err := dec.Exemplars(corrupted, nil)
-		require.Equal(t, errors.Cause(err), encoding.ErrInvalidSize)
+		require.True(t, errors.Is(err, encoding.ErrInvalidSize))
 	})
 
 	t.Run("Test corrupted metadata record", func(t *testing.T) {
@@ -242,7 +242,7 @@ func TestRecord_Corrupted(t *testing.T) {
 
 		corrupted := enc.Metadata(meta, nil)[:8]
 		_, err := dec.Metadata(corrupted, nil)
-		require.Equal(t, errors.Cause(err), encoding.ErrInvalidSize)
+		require.True(t, errors.Is(err, encoding.ErrInvalidSize))
 	})
 
 	t.Run("Test corrupted histogram record", func(t *testing.T) {
@@ -267,7 +267,7 @@ func TestRecord_Corrupted(t *testing.T) {
 
 		corrupted := enc.HistogramSamples(histograms, nil)[:8]
 		_, err := dec.HistogramSamples(corrupted, nil)
-		require.Equal(t, errors.Cause(err), encoding.ErrInvalidSize)
+		require.True(t, errors.Is(err, encoding.ErrInvalidSize))
 	})
 }
 

From 469e415d09d185b6a61c90bd4c3eda821e78321b Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Sat, 11 Nov 2023 21:01:24 +0100
Subject: [PATCH 41/66] Update record.go

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/record/record.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tsdb/record/record.go b/tsdb/record/record.go
index ad4c324c6c..75c15c4900 100644
--- a/tsdb/record/record.go
+++ b/tsdb/record/record.go
@@ -532,7 +532,7 @@ func (d *Decoder) FloatHistogramSamples(rec []byte, histograms []RefFloatHistogr
 	}
 
 	if dec.Err() != nil {
-		return nil, fmt.Errorf("decode error after %d histograms: %w", len(histograms),  dec.Err())
+		return nil, fmt.Errorf("decode error after %d histograms: %w", len(histograms), dec.Err())
 	}
 	if len(dec.B) > 0 {
 		return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))

From acc114fe553b660cefc71a0311792ef8be4a186a Mon Sep 17 00:00:00 2001
From: George Krajcsovits <krajorama@users.noreply.github.com>
Date: Sun, 12 Nov 2023 15:51:37 +0100
Subject: [PATCH 42/66] Fix panic during tsdb Commit (#13092)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix panic during tsdb Commit

Fixes the following

panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x19deb45]

goroutine 651118930 [running]:
github.com/prometheus/prometheus/tsdb.(*headAppender).Commit(0xc19100f7c0)
	/drone/src/vendor/github.com/prometheus/prometheus/tsdb/head_append.go:855 +0x245
github.com/prometheus/prometheus/tsdb.dbAppender.Commit({{0x35bd6f0?, 0xc19100f7c0?}, 0xc000fa4c00?})
	/drone/src/vendor/github.com/prometheus/prometheus/tsdb/db.go:1159 +0x2f

We theorize that the panic happened due the the series referenced by the
exemplar being removed between AppendExemplar and Commit due to being idle.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/head_append.go |  6 ++++++
 tsdb/head_test.go   | 28 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index 785e99db07..be53a4f3f6 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -751,6 +751,12 @@ func (a *headAppender) Commit() (err error) {
 	// No errors logging to WAL, so pass the exemplars along to the in memory storage.
 	for _, e := range a.exemplars {
 		s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref))
+		if s == nil {
+			// This is very unlikely to happen, but we have seen it in the wild.
+			// It means that the series was truncated between AppendExemplar and Commit.
+			// See TestHeadCompactionWhileAppendAndCommitExemplar.
+			continue
+		}
 		// We don't instrument exemplar appends here, all is instrumented by storage.
 		if err := a.head.exemplars.AddExemplar(s.lset, e.exemplar); err != nil {
 			if err == storage.ErrOutOfOrderExemplar {
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index 253f92d61b..f2325039a4 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -5514,3 +5514,31 @@ func TestWALSampleAndExemplarOrder(t *testing.T) {
 		})
 	}
 }
+
+// TestHeadCompactionWhileAppendAndCommitExemplar simulates a use case where
+// a series is removed from the head while an exemplar is being appended to it.
+// This can happen in theory by compacting the head at the right time due to
+// a series being idle.
+// The test cheats a little bit by not appending a sample with the exemplar.
+// If you also add a sample and run Truncate in a concurrent goroutine and run
+// the test around a million(!) times, you can get
+// `unknown HeadSeriesRef when trying to add exemplar: 1` error on push.
+// It is likely that running the test for much longer and with more time variations
+// would trigger the
+// `signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0xbb03d1`
+// panic, that we have seen in the wild once.
+func TestHeadCompactionWhileAppendAndCommitExemplar(t *testing.T) {
+	h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false)
+	app := h.Appender(context.Background())
+	lbls := labels.FromStrings("foo", "bar")
+	ref, err := app.Append(0, lbls, 1, 1)
+	require.NoError(t, err)
+	app.Commit()
+	// Not adding a sample here to trigger the fault.
+	app = h.Appender(context.Background())
+	_, err = app.AppendExemplar(ref, lbls, exemplar.Exemplar{Value: 1, Ts: 20})
+	require.NoError(t, err)
+	h.Truncate(10)
+	app.Commit()
+	h.Close()
+}

From 08c17df24434d289f13319928ef22f79f926d9b2 Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Sat, 11 Nov 2023 14:07:09 +0100
Subject: [PATCH 43/66] remote/storage.go: add a test to highlight a race
 condition between Storage.Notify() and Storage.ApplyConfig()

see https://github.com/prometheus/prometheus/issues/12747

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 storage/remote/storage_test.go | 38 ++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/storage/remote/storage_test.go b/storage/remote/storage_test.go
index b2848f933d..040a23a5a6 100644
--- a/storage/remote/storage_test.go
+++ b/storage/remote/storage_test.go
@@ -14,7 +14,9 @@
 package remote
 
 import (
+	"fmt"
 	"net/url"
+	"sync"
 	"testing"
 
 	common_config "github.com/prometheus/common/config"
@@ -147,3 +149,39 @@ func baseRemoteReadConfig(host string) *config.RemoteReadConfig {
 	}
 	return &cfg
 }
+
+// TestWriteStorageApplyConfigsDuringCommit helps detecting races when
+// ApplyConfig runs concurrently with Notify
+// See https://github.com/prometheus/prometheus/issues/12747
+func TestWriteStorageApplyConfigsDuringCommit(t *testing.T) {
+	s := NewStorage(nil, nil, nil, t.TempDir(), defaultFlushDeadline, nil)
+
+	var wg sync.WaitGroup
+	wg.Add(2000)
+
+	start := make(chan struct{})
+	for i := 0; i < 1000; i++ {
+		go func(i int) {
+			<-start
+			conf := &config.Config{
+				GlobalConfig: config.DefaultGlobalConfig,
+				RemoteWriteConfigs: []*config.RemoteWriteConfig{
+					baseRemoteWriteConfig(fmt.Sprintf("http://test-%d.com", i)),
+				},
+			}
+			require.NoError(t, s.ApplyConfig(conf))
+			wg.Done()
+		}(i)
+	}
+
+	for i := 0; i < 1000; i++ {
+		go func() {
+			<-start
+			s.Notify()
+			wg.Done()
+		}()
+	}
+
+	close(start)
+	wg.Wait()
+}

From 65a443e6e370c9996506fb3152db678966e7154d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 13 Nov 2023 16:20:04 +0000
Subject: [PATCH 44/66] TSDB: initialize conflicts map only when we need it.

Suggested by @songjiayang.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index 410a226d8e..fb90c9fa0a 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1707,6 +1707,9 @@ func (m seriesHashmap) set(hash uint64, s *memSeries) {
 		m.unique[hash] = s
 		return
 	}
+	if m.conflicts == nil {
+		m.conflicts = make(map[uint64][]*memSeries)
+	}
 	l := m.conflicts[hash]
 	for i, prev := range l {
 		if labels.Equal(prev.lset, s.lset) {
@@ -1786,7 +1789,7 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st
 	for i := range s.hashes {
 		s.hashes[i] = seriesHashmap{
 			unique:    map[uint64]*memSeries{},
-			conflicts: map[uint64][]*memSeries{},
+			conflicts: nil, // Initialized on demand in set().
 		}
 	}
 	return s

From 41758972e49091db065e83576859efbfbc32c804 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 13 Nov 2023 18:28:48 +0000
Subject: [PATCH 45/66] web/api: optimize labelnames/values with 1 set of
 matchers (#12888)

* web/api: optimize labelnames/values with 1 set of matchers

If there is exactly one set of matchers provided, we can skip adding
the results to a map and getting them back out again.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

---------

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 web/api/v1/api.go | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/web/api/v1/api.go b/web/api/v1/api.go
index 34abe80aac..671df78872 100644
--- a/web/api/v1/api.go
+++ b/web/api/v1/api.go
@@ -668,7 +668,7 @@ func (api *API) labelNames(r *http.Request) apiFuncResult {
 		names    []string
 		warnings annotations.Annotations
 	)
-	if len(matcherSets) > 0 {
+	if len(matcherSets) > 1 {
 		labelNamesSet := make(map[string]struct{})
 
 		for _, matchers := range matcherSets {
@@ -690,7 +690,11 @@ func (api *API) labelNames(r *http.Request) apiFuncResult {
 		}
 		slices.Sort(names)
 	} else {
-		names, warnings, err = q.LabelNames(r.Context())
+		var matchers []*labels.Matcher
+		if len(matcherSets) == 1 {
+			matchers = matcherSets[0]
+		}
+		names, warnings, err = q.LabelNames(r.Context(), matchers...)
 		if err != nil {
 			return apiFuncResult{nil, &apiError{errorExec, err}, warnings, nil}
 		}
@@ -744,7 +748,7 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) {
 		vals     []string
 		warnings annotations.Annotations
 	)
-	if len(matcherSets) > 0 {
+	if len(matcherSets) > 1 {
 		var callWarnings annotations.Annotations
 		labelValuesSet := make(map[string]struct{})
 		for _, matchers := range matcherSets {
@@ -763,7 +767,11 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) {
 			vals = append(vals, val)
 		}
 	} else {
-		vals, warnings, err = q.LabelValues(ctx, name)
+		var matchers []*labels.Matcher
+		if len(matcherSets) == 1 {
+			matchers = matcherSets[0]
+		}
+		vals, warnings, err = q.LabelValues(ctx, name, matchers...)
 		if err != nil {
 			return apiFuncResult{nil, &apiError{errorExec, err}, warnings, closer}
 		}

From 413b713aa8f4ed666ea2820d351333d4bb24fa7e Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Mon, 13 Nov 2023 13:26:02 +0100
Subject: [PATCH 46/66] remote/storage.go: adjust Storage.Notify() to avoid a
 race condition with Storage.ApplyConfig()

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 storage/remote/storage.go |  5 +----
 storage/remote/write.go   | 10 ++++++++++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/storage/remote/storage.go b/storage/remote/storage.go
index b6533f9275..758ba3cc91 100644
--- a/storage/remote/storage.go
+++ b/storage/remote/storage.go
@@ -77,10 +77,7 @@ func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCal
 }
 
 func (s *Storage) Notify() {
-	for _, q := range s.rws.queues {
-		// These should all be non blocking
-		q.watcher.Notify()
-	}
+	s.rws.Notify()
 }
 
 // ApplyConfig updates the state as the new config requires.
diff --git a/storage/remote/write.go b/storage/remote/write.go
index 4b0a249014..237f8caa91 100644
--- a/storage/remote/write.go
+++ b/storage/remote/write.go
@@ -121,6 +121,16 @@ func (rws *WriteStorage) run() {
 	}
 }
 
+func (rws *WriteStorage) Notify() {
+	rws.mtx.Lock()
+	defer rws.mtx.Unlock()
+
+	for _, q := range rws.queues {
+		// These should all be non blocking
+		q.watcher.Notify()
+	}
+}
+
 // ApplyConfig updates the state as the new config requires.
 // Only stop & create queues which have changes.
 func (rws *WriteStorage) ApplyConfig(conf *config.Config) error {

From 1bfb3ed062e99bd3c74e05d9ff9a7fa4e30bbe21 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 14 Nov 2023 11:36:35 +0000
Subject: [PATCH 47/66] Labels: reduce allocations when creating from TSDB WAL
 (#13044)

* Labels: reduce allocations when creating from TSDB

When reading the WAL, by passing references into the buffer we can avoid
copying strings under `-tags stringlabels`.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 model/labels/labels.go              | 7 +++++++
 model/labels/labels_stringlabels.go | 6 ++++++
 tsdb/record/record.go               | 7 +++----
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/model/labels/labels.go b/model/labels/labels.go
index 3dc3049b1c..231460ea33 100644
--- a/model/labels/labels.go
+++ b/model/labels/labels.go
@@ -617,6 +617,13 @@ func (b *ScratchBuilder) Add(name, value string) {
 	b.add = append(b.add, Label{Name: name, Value: value})
 }
 
+// Add a name/value pair, using []byte instead of string.
+// The '-tags stringlabels' version of this function is unsafe, hence the name.
+// This version is safe - it copies the strings immediately - but we keep the same name so everything compiles.
+func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
+	b.add = append(b.add, Label{Name: string(name), Value: string(value)})
+}
+
 // Sort the labels added so far by name.
 func (b *ScratchBuilder) Sort() {
 	slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go
index cc6bfcc700..bbb4452d45 100644
--- a/model/labels/labels_stringlabels.go
+++ b/model/labels/labels_stringlabels.go
@@ -829,6 +829,12 @@ func (b *ScratchBuilder) Add(name, value string) {
 	b.add = append(b.add, Label{Name: name, Value: value})
 }
 
+// Add a name/value pair, using []byte instead of string to reduce memory allocations.
+// The values must remain live until Labels() is called.
+func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
+	b.add = append(b.add, Label{Name: yoloString(name), Value: yoloString(value)})
+}
+
 // Sort the labels added so far by name.
 func (b *ScratchBuilder) Sort() {
 	slices.SortFunc(b.add, func(a, b Label) int { return strings.Compare(a.Name, b.Name) })
diff --git a/tsdb/record/record.go b/tsdb/record/record.go
index 75c15c4900..42a656dfe8 100644
--- a/tsdb/record/record.go
+++ b/tsdb/record/record.go
@@ -279,13 +279,12 @@ func (d *Decoder) Metadata(rec []byte, metadata []RefMetadata) ([]RefMetadata, e
 
 // DecodeLabels decodes one set of labels from buf.
 func (d *Decoder) DecodeLabels(dec *encoding.Decbuf) labels.Labels {
-	// TODO: reconsider if this function could be pushed down into labels.Labels to be more efficient.
 	d.builder.Reset()
 	nLabels := dec.Uvarint()
 	for i := 0; i < nLabels; i++ {
-		lName := dec.UvarintStr()
-		lValue := dec.UvarintStr()
-		d.builder.Add(lName, lValue)
+		lName := dec.UvarintBytes()
+		lValue := dec.UvarintBytes()
+		d.builder.UnsafeAddBytes(lName, lValue)
 	}
 	return d.builder.Labels()
 }

From dd8871379a9af9dccc91031586c43c8c3ea1a511 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Tue, 14 Nov 2023 13:04:31 +0000
Subject: [PATCH 48/66] remplace errors.Errorf by fmt.Errorf

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/block.go        |  3 ++-
 tsdb/compact.go      |  2 +-
 tsdb/db.go           |  6 +++---
 tsdb/db_test.go      |  2 +-
 tsdb/head.go         |  8 ++++----
 tsdb/head_wal.go     | 10 +++++-----
 tsdb/querier_test.go |  6 +++---
 tsdb/repair.go       |  3 ++-
 tsdb/wal.go          | 14 +++++++-------
 tsdb/wlog/watcher.go |  4 ++--
 10 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/tsdb/block.go b/tsdb/block.go
index 13a3899702..b995e4fd59 100644
--- a/tsdb/block.go
+++ b/tsdb/block.go
@@ -17,6 +17,7 @@ package tsdb
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"io"
 	"os"
 	"path/filepath"
@@ -238,7 +239,7 @@ func readMetaFile(dir string) (*BlockMeta, int64, error) {
 		return nil, 0, err
 	}
 	if m.Version != metaVersion1 {
-		return nil, 0, errors.Errorf("unexpected meta file version %d", m.Version)
+		return nil, 0, fmt.Errorf("unexpected meta file version %d", m.Version)
 	}
 
 	return &m, int64(len(b)), nil
diff --git a/tsdb/compact.go b/tsdb/compact.go
index f509380f8c..32c88d2cc0 100644
--- a/tsdb/compact.go
+++ b/tsdb/compact.go
@@ -151,7 +151,7 @@ func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Log
 
 func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) {
 	if len(ranges) == 0 {
-		return nil, errors.Errorf("at least one range must be provided")
+		return nil, fmt.Errorf("at least one range must be provided")
 	}
 	if pool == nil {
 		pool = chunkenc.NewPool()
diff --git a/tsdb/db.go b/tsdb/db.go
index 8b3d4d3004..c4c05e3901 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -662,7 +662,7 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) {
 
 	_, err := os.Stat(filepath.Join(db.dir, blockID))
 	if os.IsNotExist(err) {
-		return nil, errors.Errorf("invalid block ID %s", blockID)
+		return nil, fmt.Errorf("invalid block ID %s", blockID)
 	}
 
 	block, err := OpenBlock(db.logger, filepath.Join(db.dir, blockID), nil)
@@ -1834,10 +1834,10 @@ func (db *DB) ForceHeadMMap() {
 // will create a new block containing all data that's currently in the memory buffer/WAL.
 func (db *DB) Snapshot(dir string, withHead bool) error {
 	if dir == db.dir {
-		return errors.Errorf("cannot snapshot into base directory")
+		return fmt.Errorf("cannot snapshot into base directory")
 	}
 	if _, err := ulid.ParseStrict(dir); err == nil {
-		return errors.Errorf("dir must not be a valid ULID")
+		return fmt.Errorf("dir must not be a valid ULID")
 	}
 
 	db.cmtx.Lock()
diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index f021faba92..c7ea068d60 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -3082,7 +3082,7 @@ func deleteNonBlocks(dbDir string) error {
 	}
 	for _, dir := range dirs {
 		if ok := isBlockDir(dir); !ok {
-			return errors.Errorf("root folder:%v still hase non block directory:%v", dbDir, dir.Name())
+			return fmt.Errorf("root folder:%v still hase non block directory:%v", dbDir, dir.Name())
 		}
 	}
 	return nil
diff --git a/tsdb/head.go b/tsdb/head.go
index d096bc6312..f7e697e54a 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -224,11 +224,11 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal, wbl *wlog.WL, opts *Hea
 	// even if ooo is not enabled yet.
 	capMax := opts.OutOfOrderCapMax.Load()
 	if capMax <= 0 || capMax > 255 {
-		return nil, errors.Errorf("OOOCapMax of %d is invalid. must be > 0 and <= 255", capMax)
+		return nil, fmt.Errorf("OOOCapMax of %d is invalid. must be > 0 and <= 255", capMax)
 	}
 
 	if opts.ChunkRange < 1 {
-		return nil, errors.Errorf("invalid chunk range %d", opts.ChunkRange)
+		return nil, fmt.Errorf("invalid chunk range %d", opts.ChunkRange)
 	}
 	if opts.SeriesCallback == nil {
 		opts.SeriesCallback = &noopSeriesLifecycleCallback{}
@@ -857,7 +857,7 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries)
 			slice := mmappedChunks[seriesRef]
 			if len(slice) > 0 && slice[len(slice)-1].maxTime >= mint {
 				h.metrics.mmapChunkCorruptionTotal.Inc()
-				return errors.Errorf("out of sequence m-mapped chunk for series ref %d, last chunk: [%d, %d], new: [%d, %d]",
+				return fmt.Errorf("out of sequence m-mapped chunk for series ref %d, last chunk: [%d, %d], new: [%d, %d]",
 					seriesRef, slice[len(slice)-1].minTime, slice[len(slice)-1].maxTime, mint, maxt)
 			}
 			slice = append(slice, &mmappedChunk{
@@ -872,7 +872,7 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries)
 
 		if len(ms.mmappedChunks) > 0 && ms.mmappedChunks[len(ms.mmappedChunks)-1].maxTime >= mint {
 			h.metrics.mmapChunkCorruptionTotal.Inc()
-			return errors.Errorf("out of sequence m-mapped chunk for series ref %d, last chunk: [%d, %d], new: [%d, %d]",
+			return fmt.Errorf("out of sequence m-mapped chunk for series ref %d, last chunk: [%d, %d], new: [%d, %d]",
 				seriesRef, ms.mmappedChunks[len(ms.mmappedChunks)-1].minTime, ms.mmappedChunks[len(ms.mmappedChunks)-1].maxTime,
 				mint, maxt)
 		}
diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go
index 34948f917a..07fa8280ca 100644
--- a/tsdb/head_wal.go
+++ b/tsdb/head_wal.go
@@ -970,7 +970,7 @@ func decodeSeriesFromChunkSnapshot(d *record.Decoder, b []byte) (csr chunkSnapsh
 	dec := encoding.Decbuf{B: b}
 
 	if flag := dec.Byte(); flag != chunkSnapshotRecordTypeSeries {
-		return csr, errors.Errorf("invalid record type %x", flag)
+		return csr, fmt.Errorf("invalid record type %x", flag)
 	}
 
 	csr.ref = chunks.HeadSeriesRef(dec.Be64())
@@ -1018,7 +1018,7 @@ func decodeSeriesFromChunkSnapshot(d *record.Decoder, b []byte) (csr chunkSnapsh
 
 	err = dec.Err()
 	if err != nil && len(dec.B) > 0 {
-		err = errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		err = fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 
 	return
@@ -1041,7 +1041,7 @@ func decodeTombstonesSnapshotRecord(b []byte) (tombstones.Reader, error) {
 	dec := encoding.Decbuf{B: b}
 
 	if flag := dec.Byte(); flag != chunkSnapshotRecordTypeTombstones {
-		return nil, errors.Errorf("invalid record type %x", flag)
+		return nil, fmt.Errorf("invalid record type %x", flag)
 	}
 
 	tr, err := tombstones.Decode(dec.UvarintBytes())
@@ -1254,7 +1254,7 @@ func LastChunkSnapshot(dir string) (string, int, int, error) {
 			continue
 		}
 		if !fi.IsDir() {
-			return "", 0, 0, errors.Errorf("chunk snapshot %s is not a directory", fi.Name())
+			return "", 0, 0, fmt.Errorf("chunk snapshot %s is not a directory", fi.Name())
 		}
 
 		splits := strings.Split(fi.Name()[len(chunkSnapshotPrefix):], ".")
@@ -1492,7 +1492,7 @@ Outer:
 		default:
 			// This is a record type we don't understand. It is either and old format from earlier versions,
 			// or a new format and the code was rolled back to old version.
-			loopErr = errors.Errorf("unsupported snapshot record type 0b%b", rec[0])
+			loopErr = fmt.Errorf("unsupported snapshot record type 0b%b", rec[0])
 			break Outer
 		}
 	}
diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go
index 09f76034b0..3c27ab2f3c 100644
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@@ -710,7 +710,7 @@ func createFakeReaderAndNotPopulatedChunks(s ...[]chunks.Sample) (*fakeChunksRea
 func (r *fakeChunksReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
 	chk, ok := r.chks[meta.Ref]
 	if !ok {
-		return nil, errors.Errorf("chunk not found at ref %v", meta.Ref)
+		return nil, fmt.Errorf("chunk not found at ref %v", meta.Ref)
 	}
 	return chk, nil
 }
@@ -1831,7 +1831,7 @@ func (m mockIndex) Symbols() index.StringIter {
 
 func (m *mockIndex) AddSeries(ref storage.SeriesRef, l labels.Labels, chunks ...chunks.Meta) error {
 	if _, ok := m.series[ref]; ok {
-		return errors.Errorf("series with reference %d already added", ref)
+		return fmt.Errorf("series with reference %d already added", ref)
 	}
 	l.Range(func(lbl labels.Label) {
 		m.symbols[lbl.Name] = struct{}{}
@@ -1852,7 +1852,7 @@ func (m *mockIndex) AddSeries(ref storage.SeriesRef, l labels.Labels, chunks ...
 func (m mockIndex) WritePostings(name, value string, it index.Postings) error {
 	l := labels.Label{Name: name, Value: value}
 	if _, ok := m.postings[l]; ok {
-		return errors.Errorf("postings for %s already added", l)
+		return fmt.Errorf("postings for %s already added", l)
 	}
 	ep, err := index.ExpandPostings(it)
 	if err != nil {
diff --git a/tsdb/repair.go b/tsdb/repair.go
index 0c2e08791c..0811164541 100644
--- a/tsdb/repair.go
+++ b/tsdb/repair.go
@@ -15,6 +15,7 @@ package tsdb
 
 import (
 	"encoding/json"
+	"fmt"
 	"io"
 	"os"
 	"path/filepath"
@@ -124,7 +125,7 @@ func readBogusMetaFile(dir string) (*BlockMeta, error) {
 		return nil, err
 	}
 	if m.Version != metaVersion1 && m.Version != 2 {
-		return nil, errors.Errorf("unexpected meta file version %d", m.Version)
+		return nil, fmt.Errorf("unexpected meta file version %d", m.Version)
 	}
 	return &m, nil
 }
diff --git a/tsdb/wal.go b/tsdb/wal.go
index af83127bba..bc7db35bf1 100644
--- a/tsdb/wal.go
+++ b/tsdb/wal.go
@@ -525,14 +525,14 @@ func (w *SegmentWAL) openSegmentFile(name string) (*os.File, error) {
 	case err != nil:
 		return nil, errors.Wrapf(err, "validate meta %q", f.Name())
 	case n != 8:
-		return nil, errors.Errorf("invalid header size %d in %q", n, f.Name())
+		return nil, fmt.Errorf("invalid header size %d in %q", n, f.Name())
 	}
 
 	if m := binary.BigEndian.Uint32(metab[:4]); m != WALMagic {
-		return nil, errors.Errorf("invalid magic header %x in %q", m, f.Name())
+		return nil, fmt.Errorf("invalid magic header %x in %q", m, f.Name())
 	}
 	if metab[4] != WALFormatDefault {
-		return nil, errors.Errorf("unknown WAL segment format %d in %q", metab[4], f.Name())
+		return nil, fmt.Errorf("unknown WAL segment format %d in %q", metab[4], f.Name())
 	}
 	hasError = false
 	return f, nil
@@ -1052,7 +1052,7 @@ func (e walCorruptionErr) Error() string {
 
 func (r *walReader) corruptionErr(s string, args ...interface{}) error {
 	return walCorruptionErr{
-		err:        errors.Errorf(s, args...),
+		err:        fmt.Errorf(s, args...),
 		file:       r.cur,
 		lastOffset: r.lastOffset,
 	}
@@ -1124,7 +1124,7 @@ func (r *walReader) decodeSeries(flag byte, b []byte, res *[]record.RefSeries) e
 		return dec.Err()
 	}
 	if len(dec.B) > 0 {
-		return errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return nil
 }
@@ -1156,7 +1156,7 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample)
 		return errors.Wrapf(dec.Err(), "decode error after %d samples", len(*res))
 	}
 	if len(dec.B) > 0 {
-		return errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return nil
 }
@@ -1176,7 +1176,7 @@ func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]tombstones.Stone)
 		return dec.Err()
 	}
 	if len(dec.B) > 0 {
-		return errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+		return fmt.Errorf("unexpected %d bytes left in entry", len(dec.B))
 	}
 	return nil
 }
diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go
index 221e9607ca..5689602e74 100644
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@@ -747,12 +747,12 @@ func checkpointNum(dir string) (int, error) {
 	// dir may contain a hidden directory, so only check the base directory
 	chunks := strings.Split(filepath.Base(dir), ".")
 	if len(chunks) != 2 {
-		return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
+		return 0, fmt.Errorf("invalid checkpoint dir string: %s", dir)
 	}
 
 	result, err := strconv.Atoi(chunks[1])
 	if err != nil {
-		return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
+		return 0, fmt.Errorf("invalid checkpoint dir string: %s", dir)
 	}
 
 	return result, nil

From c92fbf3fdf40aa2f687c6dcfaccfc6970d382c94 Mon Sep 17 00:00:00 2001
From: Julien Pivotto <roidelapluie@o11y.eu>
Date: Tue, 7 Nov 2023 16:37:37 -0600
Subject: [PATCH 49/66] Add feature flag for PromQL experimental functions.

This PR adds an Experimental flag to the functions.

This can be used by https://github.com/prometheus/prometheus/pull/13059
but also xrate and other future functions.

Signed-off-by: Julien Pivotto <roidelapluie@o11y.eu>
---
 cmd/prometheus/main.go              |   6 +-
 docs/command-line/prometheus.md     |   2 +-
 docs/feature_flags.md               |   9 +-
 promql/parser/functions.go          |  12 +-
 promql/parser/generated_parser.y    |   5 +-
 promql/parser/generated_parser.y.go | 187 ++++++++++++++--------------
 6 files changed, 121 insertions(+), 100 deletions(-)

diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 81699835a8..4112cd842b 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -63,6 +63,7 @@ import (
 	"github.com/prometheus/prometheus/notifier"
 	_ "github.com/prometheus/prometheus/plugins" // Register plugins.
 	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/prometheus/prometheus/rules"
 	"github.com/prometheus/prometheus/scrape"
 	"github.com/prometheus/prometheus/storage"
@@ -199,6 +200,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
 			case "no-default-scrape-port":
 				c.scrape.NoDefaultPort = true
 				level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
+			case "promql-experimental-functions":
+				parser.EnableExperimentalFunctions = true
+				level.Info(logger).Log("msg", "Experimental PromQL functions enabled.")
 			case "native-histograms":
 				c.tsdb.EnableNativeHistograms = true
 				// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
@@ -419,7 +423,7 @@ func main() {
 	a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
 		Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
 
-	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
+	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
 		Default("").StringsVar(&cfg.featureList)
 
 	promlogflag.AddFlags(a, &cfg.promlogConfig)
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index 78ec205f24..cd6dac555d 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -52,7 +52,7 @@ The Prometheus monitoring server
 | <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
 | <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
 | <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
-| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
+| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
 | <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
 | <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |
 
diff --git a/docs/feature_flags.md b/docs/feature_flags.md
index f580c959fe..d57763af0b 100644
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@@ -187,4 +187,11 @@ This should **only** be applied to metrics that currently produce such labels.
 
 The OTLP receiver allows Prometheus to accept [OpenTelemetry](https://opentelemetry.io/) metrics writes.
 Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features 
-won't work when you push OTLP metrics.
\ No newline at end of file
+won't work when you push OTLP metrics.
+
+## Experimental PromQL functions
+
+`--enable-feature=promql-experimental-functions`
+
+Enables PromQL functions that are considered experimental and whose name or
+semantics could change.
diff --git a/promql/parser/functions.go b/promql/parser/functions.go
index 45a30219e6..8d9d92aa14 100644
--- a/promql/parser/functions.go
+++ b/promql/parser/functions.go
@@ -16,12 +16,16 @@ package parser
 // Function represents a function of the expression language and is
 // used by function nodes.
 type Function struct {
-	Name       string
-	ArgTypes   []ValueType
-	Variadic   int
-	ReturnType ValueType
+	Name         string
+	ArgTypes     []ValueType
+	Variadic     int
+	ReturnType   ValueType
+	Experimental bool
 }
 
+// EnableExperimentalFunctions controls whether experimentalFunctions are enabled.
+var EnableExperimentalFunctions bool
+
 // Functions is a list of all functions supported by PromQL, including their types.
 var Functions = map[string]*Function{
 	"abs": {
diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y
index 676fd9fb5b..dce79f7693 100644
--- a/promql/parser/generated_parser.y
+++ b/promql/parser/generated_parser.y
@@ -22,7 +22,7 @@ import (
         "github.com/prometheus/prometheus/model/labels"
         "github.com/prometheus/prometheus/model/value"
         "github.com/prometheus/prometheus/model/histogram"
-	"github.com/prometheus/prometheus/promql/parser/posrange"
+        "github.com/prometheus/prometheus/promql/parser/posrange"
 )
 
 %}
@@ -369,6 +369,9 @@ function_call   : IDENTIFIER function_call_body
                         if !exist{
                                 yylex.(*parser).addParseErrf($1.PositionRange(),"unknown function with name %q", $1.Val)
                         }
+                        if fn != nil && fn.Experimental && !EnableExperimentalFunctions {
+                                yylex.(*parser).addParseErrf($1.PositionRange(),"function %q is not enabled", $1.Val)
+                        }
                         $$ = &Call{
                                 Func: fn,
                                 Args: $2.(Expressions),
diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go
index 77a403be35..4057d9163b 100644
--- a/promql/parser/generated_parser.y.go
+++ b/promql/parser/generated_parser.y.go
@@ -230,7 +230,7 @@ const yyEofCode = 1
 const yyErrCode = 2
 const yyInitialStackSize = 16
 
-//line promql/parser/generated_parser.y:916
+//line promql/parser/generated_parser.y:919
 
 //line yacctab:1
 var yyExca = [...]int16{
@@ -1277,6 +1277,9 @@ yydefault:
 			if !exist {
 				yylex.(*parser).addParseErrf(yyDollar[1].item.PositionRange(), "unknown function with name %q", yyDollar[1].item.Val)
 			}
+			if fn != nil && fn.Experimental && !EnableExperimentalFunctions {
+				yylex.(*parser).addParseErrf(yyDollar[1].item.PositionRange(), "function %q is not enabled", yyDollar[1].item.Val)
+			}
 			yyVAL.node = &Call{
 				Func: fn,
 				Args: yyDollar[2].node.(Expressions),
@@ -1288,86 +1291,86 @@ yydefault:
 		}
 	case 61:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:384
+//line promql/parser/generated_parser.y:387
 		{
 			yyVAL.node = yyDollar[2].node
 		}
 	case 62:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:386
+//line promql/parser/generated_parser.y:389
 		{
 			yyVAL.node = Expressions{}
 		}
 	case 63:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:390
+//line promql/parser/generated_parser.y:393
 		{
 			yyVAL.node = append(yyDollar[1].node.(Expressions), yyDollar[3].node.(Expr))
 		}
 	case 64:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:392
+//line promql/parser/generated_parser.y:395
 		{
 			yyVAL.node = Expressions{yyDollar[1].node.(Expr)}
 		}
 	case 65:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:394
+//line promql/parser/generated_parser.y:397
 		{
 			yylex.(*parser).addParseErrf(yyDollar[2].item.PositionRange(), "trailing commas not allowed in function call args")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 66:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:405
+//line promql/parser/generated_parser.y:408
 		{
 			yyVAL.node = &ParenExpr{Expr: yyDollar[2].node.(Expr), PosRange: mergeRanges(&yyDollar[1].item, &yyDollar[3].item)}
 		}
 	case 67:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:413
+//line promql/parser/generated_parser.y:416
 		{
 			yylex.(*parser).addOffset(yyDollar[1].node, yyDollar[3].duration)
 			yyVAL.node = yyDollar[1].node
 		}
 	case 68:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:418
+//line promql/parser/generated_parser.y:421
 		{
 			yylex.(*parser).addOffset(yyDollar[1].node, -yyDollar[4].duration)
 			yyVAL.node = yyDollar[1].node
 		}
 	case 69:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:423
+//line promql/parser/generated_parser.y:426
 		{
 			yylex.(*parser).unexpected("offset", "duration")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 70:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:430
+//line promql/parser/generated_parser.y:433
 		{
 			yylex.(*parser).setTimestamp(yyDollar[1].node, yyDollar[3].float)
 			yyVAL.node = yyDollar[1].node
 		}
 	case 71:
 		yyDollar = yyS[yypt-5 : yypt+1]
-//line promql/parser/generated_parser.y:435
+//line promql/parser/generated_parser.y:438
 		{
 			yylex.(*parser).setAtModifierPreprocessor(yyDollar[1].node, yyDollar[3].item)
 			yyVAL.node = yyDollar[1].node
 		}
 	case 72:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:440
+//line promql/parser/generated_parser.y:443
 		{
 			yylex.(*parser).unexpected("@", "timestamp")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 75:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:450
+//line promql/parser/generated_parser.y:453
 		{
 			var errMsg string
 			vs, ok := yyDollar[1].node.(*VectorSelector)
@@ -1392,7 +1395,7 @@ yydefault:
 		}
 	case 76:
 		yyDollar = yyS[yypt-6 : yypt+1]
-//line promql/parser/generated_parser.y:475
+//line promql/parser/generated_parser.y:478
 		{
 			yyVAL.node = &SubqueryExpr{
 				Expr:  yyDollar[1].node.(Expr),
@@ -1404,35 +1407,35 @@ yydefault:
 		}
 	case 77:
 		yyDollar = yyS[yypt-6 : yypt+1]
-//line promql/parser/generated_parser.y:485
+//line promql/parser/generated_parser.y:488
 		{
 			yylex.(*parser).unexpected("subquery selector", "\"]\"")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 78:
 		yyDollar = yyS[yypt-5 : yypt+1]
-//line promql/parser/generated_parser.y:487
+//line promql/parser/generated_parser.y:490
 		{
 			yylex.(*parser).unexpected("subquery selector", "duration or \"]\"")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 79:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:489
+//line promql/parser/generated_parser.y:492
 		{
 			yylex.(*parser).unexpected("subquery or range", "\":\" or \"]\"")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 80:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:491
+//line promql/parser/generated_parser.y:494
 		{
 			yylex.(*parser).unexpected("subquery selector", "duration")
 			yyVAL.node = yyDollar[1].node
 		}
 	case 81:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:501
+//line promql/parser/generated_parser.y:504
 		{
 			if nl, ok := yyDollar[2].node.(*NumberLiteral); ok {
 				if yyDollar[1].item.Typ == SUB {
@@ -1446,7 +1449,7 @@ yydefault:
 		}
 	case 82:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:519
+//line promql/parser/generated_parser.y:522
 		{
 			vs := yyDollar[2].node.(*VectorSelector)
 			vs.PosRange = mergeRanges(&yyDollar[1].item, vs)
@@ -1456,7 +1459,7 @@ yydefault:
 		}
 	case 83:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:527
+//line promql/parser/generated_parser.y:530
 		{
 			vs := &VectorSelector{
 				Name:          yyDollar[1].item.Val,
@@ -1468,7 +1471,7 @@ yydefault:
 		}
 	case 84:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:537
+//line promql/parser/generated_parser.y:540
 		{
 			vs := yyDollar[1].node.(*VectorSelector)
 			yylex.(*parser).assembleVectorSelector(vs)
@@ -1476,7 +1479,7 @@ yydefault:
 		}
 	case 85:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:545
+//line promql/parser/generated_parser.y:548
 		{
 			yyVAL.node = &VectorSelector{
 				LabelMatchers: yyDollar[2].matchers,
@@ -1485,7 +1488,7 @@ yydefault:
 		}
 	case 86:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:552
+//line promql/parser/generated_parser.y:555
 		{
 			yyVAL.node = &VectorSelector{
 				LabelMatchers: yyDollar[2].matchers,
@@ -1494,7 +1497,7 @@ yydefault:
 		}
 	case 87:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:559
+//line promql/parser/generated_parser.y:562
 		{
 			yyVAL.node = &VectorSelector{
 				LabelMatchers: []*labels.Matcher{},
@@ -1503,7 +1506,7 @@ yydefault:
 		}
 	case 88:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:568
+//line promql/parser/generated_parser.y:571
 		{
 			if yyDollar[1].matchers != nil {
 				yyVAL.matchers = append(yyDollar[1].matchers, yyDollar[3].matcher)
@@ -1513,47 +1516,47 @@ yydefault:
 		}
 	case 89:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:576
+//line promql/parser/generated_parser.y:579
 		{
 			yyVAL.matchers = []*labels.Matcher{yyDollar[1].matcher}
 		}
 	case 90:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:578
+//line promql/parser/generated_parser.y:581
 		{
 			yylex.(*parser).unexpected("label matching", "\",\" or \"}\"")
 			yyVAL.matchers = yyDollar[1].matchers
 		}
 	case 91:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:582
+//line promql/parser/generated_parser.y:585
 		{
 			yyVAL.matcher = yylex.(*parser).newLabelMatcher(yyDollar[1].item, yyDollar[2].item, yyDollar[3].item)
 		}
 	case 92:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:584
+//line promql/parser/generated_parser.y:587
 		{
 			yylex.(*parser).unexpected("label matching", "string")
 			yyVAL.matcher = nil
 		}
 	case 93:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:586
+//line promql/parser/generated_parser.y:589
 		{
 			yylex.(*parser).unexpected("label matching", "label matching operator")
 			yyVAL.matcher = nil
 		}
 	case 94:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:588
+//line promql/parser/generated_parser.y:591
 		{
 			yylex.(*parser).unexpected("label matching", "identifier or \"}\"")
 			yyVAL.matcher = nil
 		}
 	case 95:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:596
+//line promql/parser/generated_parser.y:599
 		{
 			b := labels.NewBuilder(yyDollar[2].labels)
 			b.Set(labels.MetricName, yyDollar[1].item.Val)
@@ -1561,83 +1564,83 @@ yydefault:
 		}
 	case 96:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:598
+//line promql/parser/generated_parser.y:601
 		{
 			yyVAL.labels = yyDollar[1].labels
 		}
 	case 119:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:605
+//line promql/parser/generated_parser.y:608
 		{
 			yyVAL.labels = labels.New(yyDollar[2].lblList...)
 		}
 	case 120:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:607
+//line promql/parser/generated_parser.y:610
 		{
 			yyVAL.labels = labels.New(yyDollar[2].lblList...)
 		}
 	case 121:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:609
+//line promql/parser/generated_parser.y:612
 		{
 			yyVAL.labels = labels.New()
 		}
 	case 122:
 		yyDollar = yyS[yypt-0 : yypt+1]
-//line promql/parser/generated_parser.y:611
+//line promql/parser/generated_parser.y:614
 		{
 			yyVAL.labels = labels.New()
 		}
 	case 123:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:615
+//line promql/parser/generated_parser.y:618
 		{
 			yyVAL.lblList = append(yyDollar[1].lblList, yyDollar[3].label)
 		}
 	case 124:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:617
+//line promql/parser/generated_parser.y:620
 		{
 			yyVAL.lblList = []labels.Label{yyDollar[1].label}
 		}
 	case 125:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:619
+//line promql/parser/generated_parser.y:622
 		{
 			yylex.(*parser).unexpected("label set", "\",\" or \"}\"")
 			yyVAL.lblList = yyDollar[1].lblList
 		}
 	case 126:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:624
+//line promql/parser/generated_parser.y:627
 		{
 			yyVAL.label = labels.Label{Name: yyDollar[1].item.Val, Value: yylex.(*parser).unquoteString(yyDollar[3].item.Val)}
 		}
 	case 127:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:626
+//line promql/parser/generated_parser.y:629
 		{
 			yylex.(*parser).unexpected("label set", "string")
 			yyVAL.label = labels.Label{}
 		}
 	case 128:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:628
+//line promql/parser/generated_parser.y:631
 		{
 			yylex.(*parser).unexpected("label set", "\"=\"")
 			yyVAL.label = labels.Label{}
 		}
 	case 129:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:630
+//line promql/parser/generated_parser.y:633
 		{
 			yylex.(*parser).unexpected("label set", "identifier or \"}\"")
 			yyVAL.label = labels.Label{}
 		}
 	case 130:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:641
+//line promql/parser/generated_parser.y:644
 		{
 			yylex.(*parser).generatedParserResult = &seriesDescription{
 				labels: yyDollar[1].labels,
@@ -1646,38 +1649,38 @@ yydefault:
 		}
 	case 131:
 		yyDollar = yyS[yypt-0 : yypt+1]
-//line promql/parser/generated_parser.y:650
+//line promql/parser/generated_parser.y:653
 		{
 			yyVAL.series = []SequenceValue{}
 		}
 	case 132:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:652
+//line promql/parser/generated_parser.y:655
 		{
 			yyVAL.series = append(yyDollar[1].series, yyDollar[3].series...)
 		}
 	case 133:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:654
+//line promql/parser/generated_parser.y:657
 		{
 			yyVAL.series = yyDollar[1].series
 		}
 	case 134:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:656
+//line promql/parser/generated_parser.y:659
 		{
 			yylex.(*parser).unexpected("series values", "")
 			yyVAL.series = nil
 		}
 	case 135:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:660
+//line promql/parser/generated_parser.y:663
 		{
 			yyVAL.series = []SequenceValue{{Omitted: true}}
 		}
 	case 136:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:662
+//line promql/parser/generated_parser.y:665
 		{
 			yyVAL.series = []SequenceValue{}
 			for i := uint64(0); i < yyDollar[3].uint; i++ {
@@ -1686,13 +1689,13 @@ yydefault:
 		}
 	case 137:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:669
+//line promql/parser/generated_parser.y:672
 		{
 			yyVAL.series = []SequenceValue{{Value: yyDollar[1].float}}
 		}
 	case 138:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:671
+//line promql/parser/generated_parser.y:674
 		{
 			yyVAL.series = []SequenceValue{}
 			// Add an additional value for time 0, which we ignore in tests.
@@ -1702,7 +1705,7 @@ yydefault:
 		}
 	case 139:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:679
+//line promql/parser/generated_parser.y:682
 		{
 			yyVAL.series = []SequenceValue{}
 			// Add an additional value for time 0, which we ignore in tests.
@@ -1713,13 +1716,13 @@ yydefault:
 		}
 	case 140:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:689
+//line promql/parser/generated_parser.y:692
 		{
 			yyVAL.series = []SequenceValue{{Histogram: yyDollar[1].histogram}}
 		}
 	case 141:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:693
+//line promql/parser/generated_parser.y:696
 		{
 			yyVAL.series = []SequenceValue{}
 			// Add an additional value for time 0, which we ignore in tests.
@@ -1730,7 +1733,7 @@ yydefault:
 		}
 	case 142:
 		yyDollar = yyS[yypt-5 : yypt+1]
-//line promql/parser/generated_parser.y:702
+//line promql/parser/generated_parser.y:705
 		{
 			val, err := yylex.(*parser).histogramsIncreaseSeries(yyDollar[1].histogram, yyDollar[3].histogram, yyDollar[5].uint)
 			if err != nil {
@@ -1740,7 +1743,7 @@ yydefault:
 		}
 	case 143:
 		yyDollar = yyS[yypt-5 : yypt+1]
-//line promql/parser/generated_parser.y:710
+//line promql/parser/generated_parser.y:713
 		{
 			val, err := yylex.(*parser).histogramsDecreaseSeries(yyDollar[1].histogram, yyDollar[3].histogram, yyDollar[5].uint)
 			if err != nil {
@@ -1750,7 +1753,7 @@ yydefault:
 		}
 	case 144:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:720
+//line promql/parser/generated_parser.y:723
 		{
 			if yyDollar[1].item.Val != "stale" {
 				yylex.(*parser).unexpected("series values", "number or \"stale\"")
@@ -1759,138 +1762,138 @@ yydefault:
 		}
 	case 147:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:732
+//line promql/parser/generated_parser.y:735
 		{
 			yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&yyDollar[2].descriptors)
 		}
 	case 148:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:736
+//line promql/parser/generated_parser.y:739
 		{
 			yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&yyDollar[2].descriptors)
 		}
 	case 149:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:740
+//line promql/parser/generated_parser.y:743
 		{
 			m := yylex.(*parser).newMap()
 			yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m)
 		}
 	case 150:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:745
+//line promql/parser/generated_parser.y:748
 		{
 			m := yylex.(*parser).newMap()
 			yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m)
 		}
 	case 151:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:753
+//line promql/parser/generated_parser.y:756
 		{
 			yyVAL.descriptors = *(yylex.(*parser).mergeMaps(&yyDollar[1].descriptors, &yyDollar[3].descriptors))
 		}
 	case 152:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:757
+//line promql/parser/generated_parser.y:760
 		{
 			yyVAL.descriptors = yyDollar[1].descriptors
 		}
 	case 153:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:760
+//line promql/parser/generated_parser.y:763
 		{
 			yylex.(*parser).unexpected("histogram description", "histogram description key, e.g. buckets:[5 10 7]")
 		}
 	case 154:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:767
+//line promql/parser/generated_parser.y:770
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["schema"] = yyDollar[3].int
 		}
 	case 155:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:772
+//line promql/parser/generated_parser.y:775
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["sum"] = yyDollar[3].float
 		}
 	case 156:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:777
+//line promql/parser/generated_parser.y:780
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["count"] = yyDollar[3].float
 		}
 	case 157:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:782
+//line promql/parser/generated_parser.y:785
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["z_bucket"] = yyDollar[3].float
 		}
 	case 158:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:787
+//line promql/parser/generated_parser.y:790
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["z_bucket_w"] = yyDollar[3].float
 		}
 	case 159:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:792
+//line promql/parser/generated_parser.y:795
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["buckets"] = yyDollar[3].bucket_set
 		}
 	case 160:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:797
+//line promql/parser/generated_parser.y:800
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["offset"] = yyDollar[3].int
 		}
 	case 161:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:802
+//line promql/parser/generated_parser.y:805
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["n_buckets"] = yyDollar[3].bucket_set
 		}
 	case 162:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:807
+//line promql/parser/generated_parser.y:810
 		{
 			yyVAL.descriptors = yylex.(*parser).newMap()
 			yyVAL.descriptors["n_offset"] = yyDollar[3].int
 		}
 	case 163:
 		yyDollar = yyS[yypt-4 : yypt+1]
-//line promql/parser/generated_parser.y:814
+//line promql/parser/generated_parser.y:817
 		{
 			yyVAL.bucket_set = yyDollar[2].bucket_set
 		}
 	case 164:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:818
+//line promql/parser/generated_parser.y:821
 		{
 			yyVAL.bucket_set = yyDollar[2].bucket_set
 		}
 	case 165:
 		yyDollar = yyS[yypt-3 : yypt+1]
-//line promql/parser/generated_parser.y:824
+//line promql/parser/generated_parser.y:827
 		{
 			yyVAL.bucket_set = append(yyDollar[1].bucket_set, yyDollar[3].float)
 		}
 	case 166:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:828
+//line promql/parser/generated_parser.y:831
 		{
 			yyVAL.bucket_set = []float64{yyDollar[1].float}
 		}
 	case 213:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:853
+//line promql/parser/generated_parser.y:856
 		{
 			yyVAL.node = &NumberLiteral{
 				Val:      yylex.(*parser).number(yyDollar[1].item.Val),
@@ -1899,25 +1902,25 @@ yydefault:
 		}
 	case 214:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:861
+//line promql/parser/generated_parser.y:864
 		{
 			yyVAL.float = yylex.(*parser).number(yyDollar[1].item.Val)
 		}
 	case 215:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:863
+//line promql/parser/generated_parser.y:866
 		{
 			yyVAL.float = yyDollar[2].float
 		}
 	case 216:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:864
+//line promql/parser/generated_parser.y:867
 		{
 			yyVAL.float = -yyDollar[2].float
 		}
 	case 219:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:870
+//line promql/parser/generated_parser.y:873
 		{
 			var err error
 			yyVAL.uint, err = strconv.ParseUint(yyDollar[1].item.Val, 10, 64)
@@ -1927,19 +1930,19 @@ yydefault:
 		}
 	case 220:
 		yyDollar = yyS[yypt-2 : yypt+1]
-//line promql/parser/generated_parser.y:879
+//line promql/parser/generated_parser.y:882
 		{
 			yyVAL.int = -int64(yyDollar[2].uint)
 		}
 	case 221:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:880
+//line promql/parser/generated_parser.y:883
 		{
 			yyVAL.int = int64(yyDollar[1].uint)
 		}
 	case 222:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:884
+//line promql/parser/generated_parser.y:887
 		{
 			var err error
 			yyVAL.duration, err = parseDuration(yyDollar[1].item.Val)
@@ -1949,7 +1952,7 @@ yydefault:
 		}
 	case 223:
 		yyDollar = yyS[yypt-1 : yypt+1]
-//line promql/parser/generated_parser.y:895
+//line promql/parser/generated_parser.y:898
 		{
 			yyVAL.node = &StringLiteral{
 				Val:      yylex.(*parser).unquoteString(yyDollar[1].item.Val),
@@ -1958,13 +1961,13 @@ yydefault:
 		}
 	case 224:
 		yyDollar = yyS[yypt-0 : yypt+1]
-//line promql/parser/generated_parser.y:908
+//line promql/parser/generated_parser.y:911
 		{
 			yyVAL.duration = 0
 		}
 	case 226:
 		yyDollar = yyS[yypt-0 : yypt+1]
-//line promql/parser/generated_parser.y:912
+//line promql/parser/generated_parser.y:915
 		{
 			yyVAL.strings = nil
 		}

From e3041740e4d3b9cd9eda17a737e9e99102f24331 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Tue, 14 Nov 2023 19:04:30 +0100
Subject: [PATCH 50/66] tsdb/fileutil: use Go standard errors

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/fileutil/mmap.go              | 9 ++++-----
 tsdb/fileutil/preallocate_linux.go | 9 +++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tsdb/fileutil/mmap.go b/tsdb/fileutil/mmap.go
index 4dbca4f974..782ff27ec9 100644
--- a/tsdb/fileutil/mmap.go
+++ b/tsdb/fileutil/mmap.go
@@ -14,9 +14,8 @@
 package fileutil
 
 import (
+	"fmt"
 	"os"
-
-	"github.com/pkg/errors"
 )
 
 type MmapFile struct {
@@ -31,7 +30,7 @@ func OpenMmapFile(path string) (*MmapFile, error) {
 func OpenMmapFileWithSize(path string, size int) (mf *MmapFile, retErr error) {
 	f, err := os.Open(path)
 	if err != nil {
-		return nil, errors.Wrap(err, "try lock file")
+		return nil, fmt.Errorf("try lock file: %w", err)
 	}
 	defer func() {
 		if retErr != nil {
@@ -41,14 +40,14 @@ func OpenMmapFileWithSize(path string, size int) (mf *MmapFile, retErr error) {
 	if size <= 0 {
 		info, err := f.Stat()
 		if err != nil {
-			return nil, errors.Wrap(err, "stat")
+			return nil, fmt.Errorf("stat: %w", err)
 		}
 		size = int(info.Size())
 	}
 
 	b, err := mmap(f, size)
 	if err != nil {
-		return nil, errors.Wrapf(err, "mmap, size %d", size)
+		return nil, fmt.Errorf("mmap, size %d: %w", size, err)
 	}
 
 	return &MmapFile{f: f, b: b}, nil
diff --git a/tsdb/fileutil/preallocate_linux.go b/tsdb/fileutil/preallocate_linux.go
index ada0462213..026c69b354 100644
--- a/tsdb/fileutil/preallocate_linux.go
+++ b/tsdb/fileutil/preallocate_linux.go
@@ -15,6 +15,7 @@
 package fileutil
 
 import (
+	"errors"
 	"os"
 	"syscall"
 )
@@ -23,10 +24,10 @@ func preallocExtend(f *os.File, sizeInBytes int64) error {
 	// use mode = 0 to change size
 	err := syscall.Fallocate(int(f.Fd()), 0, 0, sizeInBytes)
 	if err != nil {
-		errno, ok := err.(syscall.Errno)
+		var errno syscall.Errno
 		// not supported; fallback
 		// fallocate EINTRs frequently in some environments; fallback
-		if ok && (errno == syscall.ENOTSUP || errno == syscall.EINTR) {
+		if errors.As(err, &errno) && (errno == syscall.ENOTSUP || errno == syscall.EINTR) {
 			return preallocExtendTrunc(f, sizeInBytes)
 		}
 	}
@@ -37,9 +38,9 @@ func preallocFixed(f *os.File, sizeInBytes int64) error {
 	// use mode = 1 to keep size; see FALLOC_FL_KEEP_SIZE
 	err := syscall.Fallocate(int(f.Fd()), 1, 0, sizeInBytes)
 	if err != nil {
-		errno, ok := err.(syscall.Errno)
+		var errno syscall.Errno
 		// treat not supported as nil error
-		if ok && errno == syscall.ENOTSUP {
+		if errors.As(err, &errno) && errno == syscall.ENOTSUP {
 			return nil
 		}
 	}

From e60a508dd8286a707e1db64a3327de61f2c955c4 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Tue, 14 Nov 2023 18:48:53 +0100
Subject: [PATCH 51/66] tsdb/errors: fix errorlint linter

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/errors/errors.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tsdb/errors/errors.go b/tsdb/errors/errors.go
index 21449e8950..6a8e72f049 100644
--- a/tsdb/errors/errors.go
+++ b/tsdb/errors/errors.go
@@ -38,7 +38,8 @@ func (es *multiError) Add(errs ...error) {
 		if err == nil {
 			continue
 		}
-		if merr, ok := err.(nonNilMultiError); ok {
+		var merr nonNilMultiError
+		if errors.As(err, &merr) {
 			*es = append(*es, merr.errs...)
 			continue
 		}

From d7c3bc4cb02586350ad347a6222cd0dcdd03a900 Mon Sep 17 00:00:00 2001
From: Matthieu MOREL <matthieu.morel35@gmail.com>
Date: Tue, 14 Nov 2023 20:46:36 +0100
Subject: [PATCH 52/66] tsdb/tsdbutil: use Go standard errors

Signed-off-by: Matthieu MOREL <matthieu.morel35@gmail.com>
---
 tsdb/tsdbutil/dir_locker.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tsdb/tsdbutil/dir_locker.go b/tsdb/tsdbutil/dir_locker.go
index 155f586415..fa939879ca 100644
--- a/tsdb/tsdbutil/dir_locker.go
+++ b/tsdb/tsdbutil/dir_locker.go
@@ -14,13 +14,13 @@
 package tsdbutil
 
 import (
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
 	tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
@@ -83,7 +83,7 @@ func (l *DirLocker) Lock() error {
 
 	lockf, _, err := fileutil.Flock(l.path)
 	if err != nil {
-		return errors.Wrap(err, "lock DB directory")
+		return fmt.Errorf("lock DB directory: %w", err)
 	}
 	l.releaser = lockf
 	return nil

From a99f48cc9f984ba9cc3d831231d42e6ba7afe745 Mon Sep 17 00:00:00 2001
From: Goutham <gouthamve@gmail.com>
Date: Wed, 15 Nov 2023 15:09:15 +0100
Subject: [PATCH 53/66] Bump OTel Collector dependency to v0.88.0

I initially didn't copy the otlptranslator/prometheus folder because I
assumed it wouldn't get changes. But it did. So this PR fixes that and
updates the Collector version.

Supersedes: https://github.com/prometheus/prometheus/pull/12809

Signed-off-by: Goutham <gouthamve@gmail.com>
---
 go.mod                                        |   1 +
 go.sum                                        |   2 +
 .../prometheus/normalize_label.go             |  22 ++-
 .../prometheus/normalize_label_test.go        |  19 --
 .../prometheus/normalize_name.go              |  67 +++++--
 .../prometheus/normalize_name_test.go         | 180 ------------------
 .../prometheus/testutils_test.go              |  34 ----
 .../otlptranslator/prometheus/unit_to_ucum.go |  90 +++++++++
 .../prometheusremotewrite/helper.go           | 137 +++++++------
 .../prometheusremotewrite/histograms.go       |  82 ++++++--
 .../prometheusremotewrite/metrics_to_prw.go   |   4 +-
 .../number_data_points.go                     |   4 +-
 storage/remote/otlptranslator/update-copy.sh  |   5 +-
 13 files changed, 308 insertions(+), 339 deletions(-)
 delete mode 100644 storage/remote/otlptranslator/prometheus/normalize_label_test.go
 delete mode 100644 storage/remote/otlptranslator/prometheus/normalize_name_test.go
 delete mode 100644 storage/remote/otlptranslator/prometheus/testutils_test.go
 create mode 100644 storage/remote/otlptranslator/prometheus/unit_to_ucum.go

diff --git a/go.mod b/go.mod
index 44cf7c4b2e..d1396dce60 100644
--- a/go.mod
+++ b/go.mod
@@ -55,6 +55,7 @@ require (
 	github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c
 	github.com/stretchr/testify v1.8.4
 	github.com/vultr/govultr/v2 v2.17.2
+	go.opentelemetry.io/collector/featuregate v0.77.0
 	go.opentelemetry.io/collector/pdata v1.0.0-rcv0017
 	go.opentelemetry.io/collector/semconv v0.88.0
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0
diff --git a/go.sum b/go.sum
index c4658fbdbe..46a1dd1f6c 100644
--- a/go.sum
+++ b/go.sum
@@ -760,6 +760,8 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+go.opentelemetry.io/collector/featuregate v0.77.0 h1:m1/IzaXoQh6SgF6CM80vrBOCf5zSJ2GVISfA27fYzGU=
+go.opentelemetry.io/collector/featuregate v0.77.0/go.mod h1:/kVAsGUCyJXIDSgHftCN63QiwAEVHRLX2Kh/S+dqgHY=
 go.opentelemetry.io/collector/pdata v1.0.0-rcv0017 h1:AgALhc2VenoA5l1DvTdg7mkzaBGqoTSuMkAtjsttBFo=
 go.opentelemetry.io/collector/pdata v1.0.0-rcv0017/go.mod h1:Rv9fOclA5AtM/JGm0d4jBOIAo1+jBA13UT5Bx0ovXi4=
 go.opentelemetry.io/collector/semconv v0.88.0 h1:8TVP4hYaUC87S6CCLKNoSxsUE0ChldE4vqotvNHHUnE=
diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go
index 9f37c0af23..af0960e862 100644
--- a/storage/remote/otlptranslator/prometheus/normalize_label.go
+++ b/storage/remote/otlptranslator/prometheus/normalize_label.go
@@ -1,21 +1,31 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package normalize
+package prometheus // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus"
 
 import (
 	"strings"
 	"unicode"
+
+	"go.opentelemetry.io/collector/featuregate"
 )
 
-// Normalizes the specified label to follow Prometheus label names standard.
+var dropSanitizationGate = featuregate.GlobalRegistry().MustRegister(
+	"pkg.translator.prometheus.PermissiveLabelSanitization",
+	featuregate.StageAlpha,
+	featuregate.WithRegisterDescription("Controls whether to change labels starting with '_' to 'key_'."),
+	featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"),
+)
+
+// Normalizes the specified label to follow Prometheus label names standard
 //
 // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
 //
-// Labels that start with non-letter rune will be prefixed with "key_".
+// Labels that start with non-letter rune will be prefixed with "key_"
 //
-// Exception is made for double-underscores which are allowed.
+// Exception is made for double-underscores which are allowed
 func NormalizeLabel(label string) string {
+
 	// Trivial case
 	if len(label) == 0 {
 		return label
@@ -27,12 +37,14 @@ func NormalizeLabel(label string) string {
 	// If label starts with a number, prepend with "key_"
 	if unicode.IsDigit(rune(label[0])) {
 		label = "key_" + label
+	} else if strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") && !dropSanitizationGate.IsEnabled() {
+		label = "key" + label
 	}
 
 	return label
 }
 
-// Return '_' for anything non-alphanumeric.
+// Return '_' for anything non-alphanumeric
 func sanitizeRune(r rune) rune {
 	if unicode.IsLetter(r) || unicode.IsDigit(r) {
 		return r
diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go
deleted file mode 100644
index 7346b20f9b..0000000000
--- a/storage/remote/otlptranslator/prometheus/normalize_label_test.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package normalize
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-)
-
-func TestSanitizeDropSanitization(t *testing.T) {
-	require.Equal(t, "", NormalizeLabel(""))
-	require.Equal(t, "_test", NormalizeLabel("_test"))
-	require.Equal(t, "key_0test", NormalizeLabel("0test"))
-	require.Equal(t, "test", NormalizeLabel("test"))
-	require.Equal(t, "test__", NormalizeLabel("test_/"))
-	require.Equal(t, "__test", NormalizeLabel("__test"))
-}
diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go
index b57e5a0575..72fc04cea2 100644
--- a/storage/remote/otlptranslator/prometheus/normalize_name.go
+++ b/storage/remote/otlptranslator/prometheus/normalize_name.go
@@ -1,21 +1,23 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package normalize
+package prometheus // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus"
 
 import (
 	"strings"
 	"unicode"
 
+	"go.opentelemetry.io/collector/featuregate"
 	"go.opentelemetry.io/collector/pdata/pmetric"
 )
 
-// The map to translate OTLP units to Prometheus units.
+// The map to translate OTLP units to Prometheus units
 // OTLP metrics use the c/s notation as specified at https://ucum.org/ucum.html
 // (See also https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/README.md#instrument-units)
 // Prometheus best practices for units: https://prometheus.io/docs/practices/naming/#base-units
 // OpenMetrics specification for units: https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#units-and-base-units
 var unitMap = map[string]string{
+
 	// Time
 	"d":   "days",
 	"h":   "hours",
@@ -35,11 +37,6 @@ var unitMap = map[string]string{
 	"MBy":  "megabytes",
 	"GBy":  "gigabytes",
 	"TBy":  "terabytes",
-	"B":    "bytes",
-	"KB":   "kilobytes",
-	"MB":   "megabytes",
-	"GB":   "gigabytes",
-	"TB":   "terabytes",
 
 	// SI
 	"m": "meters",
@@ -54,11 +51,10 @@ var unitMap = map[string]string{
 	"Hz":  "hertz",
 	"1":   "",
 	"%":   "percent",
-	"$":   "dollars",
 }
 
-// The map that translates the "per" unit.
-// Example: s => per second (singular).
+// The map that translates the "per" unit
+// Example: s => per second (singular)
 var perUnitMap = map[string]string{
 	"s":  "second",
 	"m":  "minute",
@@ -69,7 +65,14 @@ var perUnitMap = map[string]string{
 	"y":  "year",
 }
 
-// Build a Prometheus-compliant metric name for the specified metric.
+var normalizeNameGate = featuregate.GlobalRegistry().MustRegister(
+	"pkg.translator.prometheus.NormalizeName",
+	featuregate.StageBeta,
+	featuregate.WithRegisterDescription("Controls whether metrics names are automatically normalized to follow Prometheus naming convention"),
+	featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"),
+)
+
+// BuildCompliantName builds a Prometheus-compliant metric name for the specified metric
 //
 // Metric name is prefixed with specified namespace and underscore (if any).
 // Namespace is not cleaned up. Make sure specified namespace follows Prometheus
@@ -77,7 +80,33 @@ var perUnitMap = map[string]string{
 //
 // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
 // and https://prometheus.io/docs/practices/naming/#metric-and-label-naming
-func BuildPromCompliantName(metric pmetric.Metric, namespace string) string {
+func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string {
+	var metricName string
+
+	// Full normalization following standard Prometheus naming conventions
+	if addMetricSuffixes && normalizeNameGate.IsEnabled() {
+		return normalizeName(metric, namespace)
+	}
+
+	// Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars
+	metricName = RemovePromForbiddenRunes(metric.Name())
+
+	// Namespace?
+	if namespace != "" {
+		return namespace + "_" + metricName
+	}
+
+	// Metric name starts with a digit? Prefix it with an underscore
+	if metricName != "" && unicode.IsDigit(rune(metricName[0])) {
+		metricName = "_" + metricName
+	}
+
+	return metricName
+}
+
+// Build a normalized name for the specified metric
+func normalizeName(metric pmetric.Metric, namespace string) string {
+
 	// Split metric name in "tokens" (remove all non-alphanumeric)
 	nameTokens := strings.FieldsFunc(
 		metric.Name(),
@@ -202,7 +231,7 @@ func removeSuffix(tokens []string, suffix string) []string {
 	return tokens
 }
 
-// Clean up specified string so it's Prometheus compliant.
+// Clean up specified string so it's Prometheus compliant
 func CleanUpString(s string) string {
 	return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_")
 }
@@ -211,8 +240,8 @@ func RemovePromForbiddenRunes(s string) string {
 	return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_")
 }
 
-// Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit.
-// Returns the specified unit if not found in unitMap.
+// Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit
+// Returns the specified unit if not found in unitMap
 func unitMapGetOrDefault(unit string) string {
 	if promUnit, ok := unitMap[unit]; ok {
 		return promUnit
@@ -220,8 +249,8 @@ func unitMapGetOrDefault(unit string) string {
 	return unit
 }
 
-// Retrieve the Prometheus "per" unit corresponding to the specified "per" unit.
-// Returns the specified unit if not found in perUnitMap.
+// Retrieve the Prometheus "per" unit corresponding to the specified "per" unit
+// Returns the specified unit if not found in perUnitMap
 func perUnitMapGetOrDefault(perUnit string) string {
 	if promPerUnit, ok := perUnitMap[perUnit]; ok {
 		return promPerUnit
@@ -229,7 +258,7 @@ func perUnitMapGetOrDefault(perUnit string) string {
 	return perUnit
 }
 
-// Returns whether the slice contains the specified value.
+// Returns whether the slice contains the specified value
 func contains(slice []string, value string) bool {
 	for _, sliceEntry := range slice {
 		if sliceEntry == value {
@@ -239,7 +268,7 @@ func contains(slice []string, value string) bool {
 	return false
 }
 
-// Remove the specified value from the slice.
+// Remove the specified value from the slice
 func removeItem(slice []string, value string) []string {
 	newSlice := make([]string, 0, len(slice))
 	for _, sliceEntry := range slice {
diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go
deleted file mode 100644
index 33910636a4..0000000000
--- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go
+++ /dev/null
@@ -1,180 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package normalize
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-	"go.opentelemetry.io/collector/pdata/pmetric"
-)
-
-func TestByte(t *testing.T) {
-	require.Equal(t, "system_filesystem_usage_bytes", BuildPromCompliantName(createGauge("system.filesystem.usage", "By"), ""))
-}
-
-func TestByteCounter(t *testing.T) {
-	require.Equal(t, "system_io_bytes_total", BuildPromCompliantName(createCounter("system.io", "By"), ""))
-	require.Equal(t, "network_transmitted_bytes_total", BuildPromCompliantName(createCounter("network_transmitted_bytes_total", "By"), ""))
-}
-
-func TestWhiteSpaces(t *testing.T) {
-	require.Equal(t, "system_filesystem_usage_bytes", BuildPromCompliantName(createGauge("\t system.filesystem.usage       ", "  By\t"), ""))
-}
-
-func TestNonStandardUnit(t *testing.T) {
-	require.Equal(t, "system_network_dropped", BuildPromCompliantName(createGauge("system.network.dropped", "{packets}"), ""))
-}
-
-func TestNonStandardUnitCounter(t *testing.T) {
-	require.Equal(t, "system_network_dropped_total", BuildPromCompliantName(createCounter("system.network.dropped", "{packets}"), ""))
-}
-
-func TestBrokenUnit(t *testing.T) {
-	require.Equal(t, "system_network_dropped_packets", BuildPromCompliantName(createGauge("system.network.dropped", "packets"), ""))
-	require.Equal(t, "system_network_packets_dropped", BuildPromCompliantName(createGauge("system.network.packets.dropped", "packets"), ""))
-	require.Equal(t, "system_network_packets", BuildPromCompliantName(createGauge("system.network.packets", "packets"), ""))
-}
-
-func TestBrokenUnitCounter(t *testing.T) {
-	require.Equal(t, "system_network_dropped_packets_total", BuildPromCompliantName(createCounter("system.network.dropped", "packets"), ""))
-	require.Equal(t, "system_network_packets_dropped_total", BuildPromCompliantName(createCounter("system.network.packets.dropped", "packets"), ""))
-	require.Equal(t, "system_network_packets_total", BuildPromCompliantName(createCounter("system.network.packets", "packets"), ""))
-}
-
-func TestRatio(t *testing.T) {
-	require.Equal(t, "hw_gpu_memory_utilization_ratio", BuildPromCompliantName(createGauge("hw.gpu.memory.utilization", "1"), ""))
-	require.Equal(t, "hw_fan_speed_ratio", BuildPromCompliantName(createGauge("hw.fan.speed_ratio", "1"), ""))
-	require.Equal(t, "objects_total", BuildPromCompliantName(createCounter("objects", "1"), ""))
-}
-
-func TestHertz(t *testing.T) {
-	require.Equal(t, "hw_cpu_speed_limit_hertz", BuildPromCompliantName(createGauge("hw.cpu.speed_limit", "Hz"), ""))
-}
-
-func TestPer(t *testing.T) {
-	require.Equal(t, "broken_metric_speed_km_per_hour", BuildPromCompliantName(createGauge("broken.metric.speed", "km/h"), ""))
-	require.Equal(t, "astro_light_speed_limit_meters_per_second", BuildPromCompliantName(createGauge("astro.light.speed_limit", "m/s"), ""))
-}
-
-func TestPercent(t *testing.T) {
-	require.Equal(t, "broken_metric_success_ratio_percent", BuildPromCompliantName(createGauge("broken.metric.success_ratio", "%"), ""))
-	require.Equal(t, "broken_metric_success_percent", BuildPromCompliantName(createGauge("broken.metric.success_percent", "%"), ""))
-}
-
-func TestDollar(t *testing.T) {
-	require.Equal(t, "crypto_bitcoin_value_dollars", BuildPromCompliantName(createGauge("crypto.bitcoin.value", "$"), ""))
-	require.Equal(t, "crypto_bitcoin_value_dollars", BuildPromCompliantName(createGauge("crypto.bitcoin.value.dollars", "$"), ""))
-}
-
-func TestEmpty(t *testing.T) {
-	require.Equal(t, "test_metric_no_unit", BuildPromCompliantName(createGauge("test.metric.no_unit", ""), ""))
-	require.Equal(t, "test_metric_spaces", BuildPromCompliantName(createGauge("test.metric.spaces", "   \t  "), ""))
-}
-
-func TestUnsupportedRunes(t *testing.T) {
-	require.Equal(t, "unsupported_metric_temperature_F", BuildPromCompliantName(createGauge("unsupported.metric.temperature", "°F"), ""))
-	require.Equal(t, "unsupported_metric_weird", BuildPromCompliantName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), ""))
-	require.Equal(t, "unsupported_metric_redundant_test_per_C", BuildPromCompliantName(createGauge("unsupported.metric.redundant", "__test $/°C"), ""))
-}
-
-func TestOtelReceivers(t *testing.T) {
-	require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", BuildPromCompliantName(createCounter("active_directory.ds.replication.network.io", "By"), ""))
-	require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", BuildPromCompliantName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), ""))
-	require.Equal(t, "active_directory_ds_replication_object_rate_per_second", BuildPromCompliantName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), ""))
-	require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", BuildPromCompliantName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), ""))
-	require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", BuildPromCompliantName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), ""))
-	require.Equal(t, "apache_current_connections", BuildPromCompliantName(createGauge("apache.current_connections", "connections"), ""))
-	require.Equal(t, "apache_workers_connections", BuildPromCompliantName(createGauge("apache.workers", "connections"), ""))
-	require.Equal(t, "apache_requests_total", BuildPromCompliantName(createCounter("apache.requests", "1"), ""))
-	require.Equal(t, "bigip_virtual_server_request_count_total", BuildPromCompliantName(createCounter("bigip.virtual_server.request.count", "{requests}"), ""))
-	require.Equal(t, "system_cpu_utilization_ratio", BuildPromCompliantName(createGauge("system.cpu.utilization", "1"), ""))
-	require.Equal(t, "system_disk_operation_time_seconds_total", BuildPromCompliantName(createCounter("system.disk.operation_time", "s"), ""))
-	require.Equal(t, "system_cpu_load_average_15m_ratio", BuildPromCompliantName(createGauge("system.cpu.load_average.15m", "1"), ""))
-	require.Equal(t, "memcached_operation_hit_ratio_percent", BuildPromCompliantName(createGauge("memcached.operation_hit_ratio", "%"), ""))
-	require.Equal(t, "mongodbatlas_process_asserts_per_second", BuildPromCompliantName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), ""))
-	require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", BuildPromCompliantName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), ""))
-	require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", BuildPromCompliantName(createGauge("mongodbatlas.process.network.io", "By/s"), ""))
-	require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", BuildPromCompliantName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), ""))
-	require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", BuildPromCompliantName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), ""))
-	require.Equal(t, "nginx_requests", BuildPromCompliantName(createGauge("nginx.requests", "requests"), ""))
-	require.Equal(t, "nginx_connections_accepted", BuildPromCompliantName(createGauge("nginx.connections_accepted", "connections"), ""))
-	require.Equal(t, "nsxt_node_memory_usage_kilobytes", BuildPromCompliantName(createGauge("nsxt.node.memory.usage", "KBy"), ""))
-	require.Equal(t, "redis_latest_fork_microseconds", BuildPromCompliantName(createGauge("redis.latest_fork", "us"), ""))
-}
-
-func TestTrimPromSuffixes(t *testing.T) {
-	require.Equal(t, "active_directory_ds_replication_network_io", TrimPromSuffixes("active_directory_ds_replication_network_io_bytes_total", pmetric.MetricTypeSum, "bytes"))
-	require.Equal(t, "active_directory_ds_name_cache_hit_rate", TrimPromSuffixes("active_directory_ds_name_cache_hit_rate_percent", pmetric.MetricTypeGauge, "percent"))
-	require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time", TrimPromSuffixes("active_directory_ds_ldap_bind_last_successful_time_milliseconds", pmetric.MetricTypeGauge, "milliseconds"))
-	require.Equal(t, "apache_requests", TrimPromSuffixes("apache_requests_total", pmetric.MetricTypeSum, "1"))
-	require.Equal(t, "system_cpu_utilization", TrimPromSuffixes("system_cpu_utilization_ratio", pmetric.MetricTypeGauge, "ratio"))
-	require.Equal(t, "mongodbatlas_process_journaling_data_files", TrimPromSuffixes("mongodbatlas_process_journaling_data_files_mebibytes", pmetric.MetricTypeGauge, "mebibytes"))
-	require.Equal(t, "mongodbatlas_process_network_io", TrimPromSuffixes("mongodbatlas_process_network_io_bytes_per_second", pmetric.MetricTypeGauge, "bytes_per_second"))
-	require.Equal(t, "mongodbatlas_process_oplog_rate", TrimPromSuffixes("mongodbatlas_process_oplog_rate_gibibytes_per_hour", pmetric.MetricTypeGauge, "gibibytes_per_hour"))
-	require.Equal(t, "nsxt_node_memory_usage", TrimPromSuffixes("nsxt_node_memory_usage_kilobytes", pmetric.MetricTypeGauge, "kilobytes"))
-	require.Equal(t, "redis_latest_fork", TrimPromSuffixes("redis_latest_fork_microseconds", pmetric.MetricTypeGauge, "microseconds"))
-	require.Equal(t, "up", TrimPromSuffixes("up", pmetric.MetricTypeGauge, ""))
-
-	// These are not necessarily valid OM units, only tested for the sake of completeness.
-	require.Equal(t, "active_directory_ds_replication_sync_object_pending", TrimPromSuffixes("active_directory_ds_replication_sync_object_pending_total", pmetric.MetricTypeSum, "{objects}"))
-	require.Equal(t, "apache_current", TrimPromSuffixes("apache_current_connections", pmetric.MetricTypeGauge, "connections"))
-	require.Equal(t, "bigip_virtual_server_request_count", TrimPromSuffixes("bigip_virtual_server_request_count_total", pmetric.MetricTypeSum, "{requests}"))
-	require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", TrimPromSuffixes("mongodbatlas_process_db_query_targeting_scanned_per_returned", pmetric.MetricTypeGauge, "{scanned}/{returned}"))
-	require.Equal(t, "nginx_connections_accepted", TrimPromSuffixes("nginx_connections_accepted", pmetric.MetricTypeGauge, "connections"))
-	require.Equal(t, "apache_workers", TrimPromSuffixes("apache_workers_connections", pmetric.MetricTypeGauge, "connections"))
-	require.Equal(t, "nginx", TrimPromSuffixes("nginx_requests", pmetric.MetricTypeGauge, "requests"))
-
-	// Units shouldn't be trimmed if the unit is not a direct match with the suffix, i.e, a suffix "_seconds" shouldn't be removed if unit is "sec" or "s"
-	require.Equal(t, "system_cpu_load_average_15m_ratio", TrimPromSuffixes("system_cpu_load_average_15m_ratio", pmetric.MetricTypeGauge, "1"))
-	require.Equal(t, "mongodbatlas_process_asserts_per_second", TrimPromSuffixes("mongodbatlas_process_asserts_per_second", pmetric.MetricTypeGauge, "{assertions}/s"))
-	require.Equal(t, "memcached_operation_hit_ratio_percent", TrimPromSuffixes("memcached_operation_hit_ratio_percent", pmetric.MetricTypeGauge, "%"))
-	require.Equal(t, "active_directory_ds_replication_object_rate_per_second", TrimPromSuffixes("active_directory_ds_replication_object_rate_per_second", pmetric.MetricTypeGauge, "{objects}/s"))
-	require.Equal(t, "system_disk_operation_time_seconds", TrimPromSuffixes("system_disk_operation_time_seconds_total", pmetric.MetricTypeSum, "s"))
-}
-
-func TestNamespace(t *testing.T) {
-	require.Equal(t, "space_test", BuildPromCompliantName(createGauge("test", ""), "space"))
-	require.Equal(t, "space_test", BuildPromCompliantName(createGauge("#test", ""), "space"))
-}
-
-func TestCleanUpString(t *testing.T) {
-	require.Equal(t, "", CleanUpString(""))
-	require.Equal(t, "a_b", CleanUpString("a b"))
-	require.Equal(t, "hello_world", CleanUpString("hello, world!"))
-	require.Equal(t, "hello_you_2", CleanUpString("hello you 2"))
-	require.Equal(t, "1000", CleanUpString("$1000"))
-	require.Equal(t, "", CleanUpString("*+$^=)"))
-}
-
-func TestUnitMapGetOrDefault(t *testing.T) {
-	require.Equal(t, "", unitMapGetOrDefault(""))
-	require.Equal(t, "seconds", unitMapGetOrDefault("s"))
-	require.Equal(t, "invalid", unitMapGetOrDefault("invalid"))
-}
-
-func TestPerUnitMapGetOrDefault(t *testing.T) {
-	require.Equal(t, "", perUnitMapGetOrDefault(""))
-	require.Equal(t, "second", perUnitMapGetOrDefault("s"))
-	require.Equal(t, "invalid", perUnitMapGetOrDefault("invalid"))
-}
-
-func TestRemoveItem(t *testing.T) {
-	require.Equal(t, []string{}, removeItem([]string{}, "test"))
-	require.Equal(t, []string{}, removeItem([]string{}, ""))
-	require.Equal(t, []string{"a", "b", "c"}, removeItem([]string{"a", "b", "c"}, "d"))
-	require.Equal(t, []string{"a", "b", "c"}, removeItem([]string{"a", "b", "c"}, ""))
-	require.Equal(t, []string{"a", "b"}, removeItem([]string{"a", "b", "c"}, "c"))
-	require.Equal(t, []string{"a", "c"}, removeItem([]string{"a", "b", "c"}, "b"))
-	require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a"))
-}
-
-func TestBuildPromCompliantName(t *testing.T) {
-	require.Equal(t, "system_io_bytes_total", BuildPromCompliantName(createCounter("system.io", "By"), ""))
-	require.Equal(t, "system_network_io_bytes_total", BuildPromCompliantName(createCounter("network.io", "By"), "system"))
-	require.Equal(t, "_3_14_digits", BuildPromCompliantName(createGauge("3.14 digits", ""), ""))
-	require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildPromCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), ""))
-	require.Equal(t, "foo_bar", BuildPromCompliantName(createGauge(":foo::bar", ""), ""))
-	require.Equal(t, "foo_bar_total", BuildPromCompliantName(createCounter(":foo::bar", ""), ""))
-}
diff --git a/storage/remote/otlptranslator/prometheus/testutils_test.go b/storage/remote/otlptranslator/prometheus/testutils_test.go
deleted file mode 100644
index dc4983bf59..0000000000
--- a/storage/remote/otlptranslator/prometheus/testutils_test.go
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package normalize
-
-import (
-	"go.opentelemetry.io/collector/pdata/pmetric"
-)
-
-var ilm pmetric.ScopeMetrics
-
-func init() {
-	metrics := pmetric.NewMetrics()
-	resourceMetrics := metrics.ResourceMetrics().AppendEmpty()
-	ilm = resourceMetrics.ScopeMetrics().AppendEmpty()
-}
-
-// Returns a new Metric of type "Gauge" with specified name and unit.
-func createGauge(name, unit string) pmetric.Metric {
-	gauge := ilm.Metrics().AppendEmpty()
-	gauge.SetName(name)
-	gauge.SetUnit(unit)
-	gauge.SetEmptyGauge()
-	return gauge
-}
-
-// Returns a new Metric of type Monotonic Sum with specified name and unit.
-func createCounter(name, unit string) pmetric.Metric {
-	counter := ilm.Metrics().AppendEmpty()
-	counter.SetEmptySum().SetIsMonotonic(true)
-	counter.SetName(name)
-	counter.SetUnit(unit)
-	return counter
-}
diff --git a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go
new file mode 100644
index 0000000000..b2f2c4f3aa
--- /dev/null
+++ b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go
@@ -0,0 +1,90 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package prometheus // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus"
+
+import "strings"
+
+var wordToUCUM = map[string]string{
+
+	// Time
+	"days":         "d",
+	"hours":        "h",
+	"minutes":      "min",
+	"seconds":      "s",
+	"milliseconds": "ms",
+	"microseconds": "us",
+	"nanoseconds":  "ns",
+
+	// Bytes
+	"bytes":     "By",
+	"kibibytes": "KiBy",
+	"mebibytes": "MiBy",
+	"gibibytes": "GiBy",
+	"tibibytes": "TiBy",
+	"kilobytes": "KBy",
+	"megabytes": "MBy",
+	"gigabytes": "GBy",
+	"terabytes": "TBy",
+
+	// SI
+	"meters":  "m",
+	"volts":   "V",
+	"amperes": "A",
+	"joules":  "J",
+	"watts":   "W",
+	"grams":   "g",
+
+	// Misc
+	"celsius": "Cel",
+	"hertz":   "Hz",
+	"ratio":   "1",
+	"percent": "%",
+}
+
+// The map that translates the "per" unit
+// Example: per_second (singular) => /s
+var perWordToUCUM = map[string]string{
+	"second": "s",
+	"minute": "m",
+	"hour":   "h",
+	"day":    "d",
+	"week":   "w",
+	"month":  "mo",
+	"year":   "y",
+}
+
+// UnitWordToUCUM converts english unit words to UCUM units:
+// https://ucum.org/ucum#section-Alphabetic-Index-By-Symbol
+// It also handles rates, such as meters_per_second, by translating the first
+// word to UCUM, and the "per" word to UCUM. It joins them with a "/" between.
+func UnitWordToUCUM(unit string) string {
+	unitTokens := strings.SplitN(unit, "_per_", 2)
+	if len(unitTokens) == 0 {
+		return ""
+	}
+	ucumUnit := wordToUCUMOrDefault(unitTokens[0])
+	if len(unitTokens) > 1 && unitTokens[1] != "" {
+		ucumUnit += "/" + perWordToUCUMOrDefault(unitTokens[1])
+	}
+	return ucumUnit
+}
+
+// wordToUCUMOrDefault retrieves the Prometheus "basic" unit corresponding to
+// the specified "basic" unit. Returns the specified unit if not found in
+// wordToUCUM.
+func wordToUCUMOrDefault(unit string) string {
+	if promUnit, ok := wordToUCUM[unit]; ok {
+		return promUnit
+	}
+	return unit
+}
+
+// perWordToUCUMOrDefault retrieve the Prometheus "per" unit corresponding to
+// the specified "per" unit. Returns the specified unit if not found in perWordToUCUM.
+func perWordToUCUMOrDefault(perUnit string) string {
+	if promPerUnit, ok := perWordToUCUM[perUnit]; ok {
+		return promPerUnit
+	}
+	return perUnit
+}
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go
index 6080686e76..49ad5672b3 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go
@@ -71,8 +71,8 @@ func (a ByLabelName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 // creates a new TimeSeries in the map if not found and returns the time series signature.
 // tsMap will be unmodified if either labels or sample is nil, but can still be modified if the exemplar is nil.
 func addSample(tsMap map[string]*prompb.TimeSeries, sample *prompb.Sample, labels []prompb.Label,
-	datatype string,
-) string {
+	datatype string) string {
+
 	if sample == nil || labels == nil || tsMap == nil {
 		return ""
 	}
@@ -132,7 +132,14 @@ func addExemplar(tsMap map[string]*prompb.TimeSeries, bucketBounds []bucketBound
 // the label slice should not contain duplicate label names; this method sorts the slice by label name before creating
 // the signature.
 func timeSeriesSignature(datatype string, labels *[]prompb.Label) string {
+	length := len(datatype)
+
+	for _, lb := range *labels {
+		length += 2 + len(lb.GetName()) + len(lb.GetValue())
+	}
+
 	b := strings.Builder{}
+	b.Grow(length)
 	b.WriteString(datatype)
 
 	sort.Sort(ByLabelName(*labels))
@@ -151,8 +158,22 @@ func timeSeriesSignature(datatype string, labels *[]prompb.Label) string {
 // Unpaired string value is ignored. String pairs overwrites OTLP labels if collision happens, and the overwrite is
 // logged. Resultant label names are sanitized.
 func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, extras ...string) []prompb.Label {
+	serviceName, haveServiceName := resource.Attributes().Get(conventions.AttributeServiceName)
+	instance, haveInstanceID := resource.Attributes().Get(conventions.AttributeServiceInstanceID)
+
+	// Calculate the maximum possible number of labels we could return so we can preallocate l
+	maxLabelCount := attributes.Len() + len(externalLabels) + len(extras)/2
+
+	if haveServiceName {
+		maxLabelCount++
+	}
+
+	if haveInstanceID {
+		maxLabelCount++
+	}
+
 	// map ensures no duplicate label name
-	l := map[string]prompb.Label{}
+	l := make(map[string]string, maxLabelCount)
 
 	// Ensure attributes are sorted by key for consistent merging of keys which
 	// collide when sanitized.
@@ -164,35 +185,25 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa
 	sort.Stable(ByLabelName(labels))
 
 	for _, label := range labels {
-		finalKey := prometheustranslator.NormalizeLabel(label.Name)
+		var finalKey = prometheustranslator.NormalizeLabel(label.Name)
 		if existingLabel, alreadyExists := l[finalKey]; alreadyExists {
-			existingLabel.Value = existingLabel.Value + ";" + label.Value
-			l[finalKey] = existingLabel
+			l[finalKey] = existingLabel + ";" + label.Value
 		} else {
-			l[finalKey] = prompb.Label{
-				Name:  finalKey,
-				Value: label.Value,
-			}
+			l[finalKey] = label.Value
 		}
 	}
 
 	// Map service.name + service.namespace to job
-	if serviceName, ok := resource.Attributes().Get(conventions.AttributeServiceName); ok {
+	if haveServiceName {
 		val := serviceName.AsString()
 		if serviceNamespace, ok := resource.Attributes().Get(conventions.AttributeServiceNamespace); ok {
 			val = fmt.Sprintf("%s/%s", serviceNamespace.AsString(), val)
 		}
-		l[model.JobLabel] = prompb.Label{
-			Name:  model.JobLabel,
-			Value: val,
-		}
+		l[model.JobLabel] = val
 	}
 	// Map service.instance.id to instance
-	if instance, ok := resource.Attributes().Get(conventions.AttributeServiceInstanceID); ok {
-		l[model.InstanceLabel] = prompb.Label{
-			Name:  model.InstanceLabel,
-			Value: instance.AsString(),
-		}
+	if haveInstanceID {
+		l[model.InstanceLabel] = instance.AsString()
 	}
 	for key, value := range externalLabels {
 		// External labels have already been sanitized
@@ -200,10 +211,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa
 			// Skip external labels if they are overridden by metric attributes
 			continue
 		}
-		l[key] = prompb.Label{
-			Name:  key,
-			Value: value,
-		}
+		l[key] = value
 	}
 
 	for i := 0; i < len(extras); i += 2 {
@@ -219,15 +227,12 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa
 		if !(len(name) > 4 && name[:2] == "__" && name[len(name)-2:] == "__") {
 			name = prometheustranslator.NormalizeLabel(name)
 		}
-		l[name] = prompb.Label{
-			Name:  name,
-			Value: extras[i+1],
-		}
+		l[name] = extras[i+1]
 	}
 
 	s := make([]prompb.Label, 0, len(l))
-	for _, lb := range l {
-		s = append(s, lb)
+	for k, v := range l {
+		s = append(s, prompb.Label{Name: k, Value: v})
 	}
 
 	return s
@@ -236,6 +241,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa
 // isValidAggregationTemporality checks whether an OTel metric has a valid
 // aggregation temporality for conversion to a Prometheus metric.
 func isValidAggregationTemporality(metric pmetric.Metric) bool {
+	//exhaustive:enforce
 	switch metric.Type() {
 	case pmetric.MetricTypeGauge, pmetric.MetricTypeSummary:
 		return true
@@ -254,7 +260,22 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool {
 func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries) {
 	timestamp := convertTimeStamp(pt.Timestamp())
 	// sum, count, and buckets of the histogram should append suffix to baseName
-	baseName := prometheustranslator.BuildPromCompliantName(metric, settings.Namespace)
+	baseName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes)
+	baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels)
+
+	createLabels := func(nameSuffix string, extras ...string) []prompb.Label {
+		extraLabelCount := len(extras) / 2
+		labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name
+		copy(labels, baseLabels)
+
+		for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ {
+			labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]})
+		}
+
+		labels = append(labels, prompb.Label{Name: nameStr, Value: baseName + nameSuffix})
+
+		return labels
+	}
 
 	// If the sum is unset, it indicates the _sum metric point should be
 	// omitted
@@ -268,7 +289,7 @@ func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon
 			sum.Value = math.Float64frombits(value.StaleNaN)
 		}
 
-		sumlabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+sumStr)
+		sumlabels := createLabels(sumStr)
 		addSample(tsMap, sum, sumlabels, metric.Type().String())
 
 	}
@@ -282,7 +303,7 @@ func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon
 		count.Value = math.Float64frombits(value.StaleNaN)
 	}
 
-	countlabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+countStr)
+	countlabels := createLabels(countStr)
 	addSample(tsMap, count, countlabels, metric.Type().String())
 
 	// cumulative count for conversion to cumulative histogram
@@ -304,7 +325,7 @@ func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon
 			bucket.Value = math.Float64frombits(value.StaleNaN)
 		}
 		boundStr := strconv.FormatFloat(bound, 'f', -1, 64)
-		labels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+bucketStr, leStr, boundStr)
+		labels := createLabels(bucketStr, leStr, boundStr)
 		sig := addSample(tsMap, bucket, labels, metric.Type().String())
 
 		bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: bound})
@@ -318,7 +339,7 @@ func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon
 	} else {
 		infBucket.Value = float64(pt.Count())
 	}
-	infLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+bucketStr, leStr, pInfStr)
+	infLabels := createLabels(bucketStr, leStr, pInfStr)
 	sig := addSample(tsMap, infBucket, infLabels, metric.Type().String())
 
 	bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: math.Inf(1)})
@@ -327,14 +348,8 @@ func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon
 	// add _created time series if needed
 	startTimestamp := pt.StartTimestamp()
 	if settings.ExportCreatedMetric && startTimestamp != 0 {
-		createdLabels := createAttributes(
-			resource,
-			pt.Attributes(),
-			settings.ExternalLabels,
-			nameStr,
-			baseName+createdSuffix,
-		)
-		addCreatedTimeSeriesIfNeeded(tsMap, createdLabels, startTimestamp, metric.Type().String())
+		labels := createLabels(createdSuffix)
+		addCreatedTimeSeriesIfNeeded(tsMap, labels, startTimestamp, metric.Type().String())
 	}
 }
 
@@ -402,6 +417,7 @@ func getPromExemplars[T exemplarType](pt T) []prompb.Exemplar {
 func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp {
 	var ts pcommon.Timestamp
 	// handle individual metric based on type
+	//exhaustive:enforce
 	switch metric.Type() {
 	case pmetric.MetricTypeGauge:
 		dataPoints := metric.Gauge().DataPoints()
@@ -441,11 +457,26 @@ func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp {
 
 // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples.
 func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings,
-	tsMap map[string]*prompb.TimeSeries,
-) {
+	tsMap map[string]*prompb.TimeSeries) {
 	timestamp := convertTimeStamp(pt.Timestamp())
 	// sum and count of the summary should append suffix to baseName
-	baseName := prometheustranslator.BuildPromCompliantName(metric, settings.Namespace)
+	baseName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes)
+	baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels)
+
+	createLabels := func(name string, extras ...string) []prompb.Label {
+		extraLabelCount := len(extras) / 2
+		labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name
+		copy(labels, baseLabels)
+
+		for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ {
+			labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]})
+		}
+
+		labels = append(labels, prompb.Label{Name: nameStr, Value: name})
+
+		return labels
+	}
+
 	// treat sum as a sample in an individual TimeSeries
 	sum := &prompb.Sample{
 		Value:     pt.Sum(),
@@ -454,7 +485,7 @@ func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Res
 	if pt.Flags().NoRecordedValue() {
 		sum.Value = math.Float64frombits(value.StaleNaN)
 	}
-	sumlabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+sumStr)
+	sumlabels := createLabels(baseName + sumStr)
 	addSample(tsMap, sum, sumlabels, metric.Type().String())
 
 	// treat count as a sample in an individual TimeSeries
@@ -465,7 +496,7 @@ func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Res
 	if pt.Flags().NoRecordedValue() {
 		count.Value = math.Float64frombits(value.StaleNaN)
 	}
-	countlabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName+countStr)
+	countlabels := createLabels(baseName + countStr)
 	addSample(tsMap, count, countlabels, metric.Type().String())
 
 	// process each percentile/quantile
@@ -479,20 +510,14 @@ func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Res
 			quantile.Value = math.Float64frombits(value.StaleNaN)
 		}
 		percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64)
-		qtlabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nameStr, baseName, quantileStr, percentileStr)
+		qtlabels := createLabels(baseName, quantileStr, percentileStr)
 		addSample(tsMap, quantile, qtlabels, metric.Type().String())
 	}
 
 	// add _created time series if needed
 	startTimestamp := pt.StartTimestamp()
 	if settings.ExportCreatedMetric && startTimestamp != 0 {
-		createdLabels := createAttributes(
-			resource,
-			pt.Attributes(),
-			settings.ExternalLabels,
-			nameStr,
-			baseName+createdSuffix,
-		)
+		createdLabels := createLabels(baseName + createdSuffix)
 		addCreatedTimeSeriesIfNeeded(tsMap, createdLabels, startTimestamp, metric.Type().String())
 	}
 }
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
index 9a4ec6e11a..3c7494a6bf 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
@@ -60,15 +60,20 @@ func addSingleExponentialHistogramDataPoint(
 // to Prometheus Native Histogram.
 func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) {
 	scale := p.Scale()
-	if scale < -4 || scale > 8 {
+	if scale < -4 {
 		return prompb.Histogram{},
 			fmt.Errorf("cannot convert exponential to native histogram."+
-				" Scale must be <= 8 and >= -4, was %d", scale)
-		// TODO: downscale to 8 if scale > 8
+				" Scale must be >= -4, was %d", scale)
 	}
 
-	pSpans, pDeltas := convertBucketsLayout(p.Positive())
-	nSpans, nDeltas := convertBucketsLayout(p.Negative())
+	var scaleDown int32
+	if scale > 8 {
+		scaleDown = scale - 8
+		scale = 8
+	}
+
+	pSpans, pDeltas := convertBucketsLayout(p.Positive(), scaleDown)
+	nSpans, nDeltas := convertBucketsLayout(p.Negative(), scaleDown)
 
 	h := prompb.Histogram{
 		Schema: scale,
@@ -106,17 +111,19 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
 // The bucket indexes conversion was adjusted, since OTel exp. histogram bucket
 // index 0 corresponds to the range (1, base] while Prometheus bucket index 0
 // to the range (base 1].
-func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets) ([]prompb.BucketSpan, []int64) {
+//
+// scaleDown is the factor by which the buckets are scaled down. In other words 2^scaleDown buckets will be merged into one.
+func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets, scaleDown int32) ([]prompb.BucketSpan, []int64) {
 	bucketCounts := buckets.BucketCounts()
 	if bucketCounts.Len() == 0 {
 		return nil, nil
 	}
 
 	var (
-		spans         []prompb.BucketSpan
-		deltas        []int64
-		prevCount     int64
-		nextBucketIdx int32
+		spans     []prompb.BucketSpan
+		deltas    []int64
+		count     int64
+		prevCount int64
 	)
 
 	appendDelta := func(count int64) {
@@ -125,34 +132,67 @@ func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets)
 		prevCount = count
 	}
 
-	for i := 0; i < bucketCounts.Len(); i++ {
-		count := int64(bucketCounts.At(i))
+	// Let the compiler figure out that this is const during this function by
+	// moving it into a local variable.
+	numBuckets := bucketCounts.Len()
+
+	// The offset is scaled and adjusted by 1 as described above.
+	bucketIdx := buckets.Offset()>>scaleDown + 1
+	spans = append(spans, prompb.BucketSpan{
+		Offset: bucketIdx,
+		Length: 0,
+	})
+
+	for i := 0; i < numBuckets; i++ {
+		// The offset is scaled and adjusted by 1 as described above.
+		nextBucketIdx := (int32(i)+buckets.Offset())>>scaleDown + 1
+		if bucketIdx == nextBucketIdx { // We have not collected enough buckets to merge yet.
+			count += int64(bucketCounts.At(i))
+			continue
+		}
 		if count == 0 {
+			count = int64(bucketCounts.At(i))
 			continue
 		}
 
-		// The offset is adjusted by 1 as described above.
-		bucketIdx := int32(i) + buckets.Offset() + 1
-		delta := bucketIdx - nextBucketIdx
-		if i == 0 || delta > 2 {
-			// We have to create a new span, either because we are
-			// at the very beginning, or because we have found a gap
+		gap := nextBucketIdx - bucketIdx - 1
+		if gap > 2 {
+			// We have to create a new span, because we have found a gap
 			// of more than two buckets. The constant 2 is copied from the logic in
 			// https://github.com/prometheus/client_golang/blob/27f0506d6ebbb117b6b697d0552ee5be2502c5f2/prometheus/histogram.go#L1296
 			spans = append(spans, prompb.BucketSpan{
-				Offset: delta,
+				Offset: gap,
 				Length: 0,
 			})
 		} else {
 			// We have found a small gap (or no gap at all).
 			// Insert empty buckets as needed.
-			for j := int32(0); j < delta; j++ {
+			for j := int32(0); j < gap; j++ {
 				appendDelta(0)
 			}
 		}
 		appendDelta(count)
-		nextBucketIdx = bucketIdx + 1
+		count = int64(bucketCounts.At(i))
+		bucketIdx = nextBucketIdx
 	}
+	// Need to use the last item's index. The offset is scaled and adjusted by 1 as described above.
+	gap := (int32(numBuckets)+buckets.Offset()-1)>>scaleDown + 1 - bucketIdx
+	if gap > 2 {
+		// We have to create a new span, because we have found a gap
+		// of more than two buckets. The constant 2 is copied from the logic in
+		// https://github.com/prometheus/client_golang/blob/27f0506d6ebbb117b6b697d0552ee5be2502c5f2/prometheus/histogram.go#L1296
+		spans = append(spans, prompb.BucketSpan{
+			Offset: gap,
+			Length: 0,
+		})
+	} else {
+		// We have found a small gap (or no gap at all).
+		// Insert empty buckets as needed.
+		for j := int32(0); j < gap; j++ {
+			appendDelta(0)
+		}
+	}
+	appendDelta(count)
 
 	return spans, deltas
 }
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
index 34ee762dd4..6a5a656048 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
@@ -22,6 +22,7 @@ type Settings struct {
 	ExternalLabels      map[string]string
 	DisableTargetInfo   bool
 	ExportCreatedMetric bool
+	AddMetricSuffixes   bool
 }
 
 // FromMetrics converts pmetric.Metrics to prometheus remote write format.
@@ -51,6 +52,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp
 				}
 
 				// handle individual metric based on type
+				//exhaustive:enforce
 				switch metric.Type() {
 				case pmetric.MetricTypeGauge:
 					dataPoints := metric.Gauge().DataPoints()
@@ -81,7 +83,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp
 					if dataPoints.Len() == 0 {
 						errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name()))
 					}
-					name := prometheustranslator.BuildPromCompliantName(metric, settings.Namespace)
+					name := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes)
 					for x := 0; x < dataPoints.Len(); x++ {
 						errs = multierr.Append(
 							errs,
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go
index 3a5d201ddd..c8e59694b8 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go
@@ -27,7 +27,7 @@ func addSingleGaugeNumberDataPoint(
 	settings Settings,
 	series map[string]*prompb.TimeSeries,
 ) {
-	name := prometheustranslator.BuildPromCompliantName(metric, settings.Namespace)
+	name := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes)
 	labels := createAttributes(
 		resource,
 		pt.Attributes(),
@@ -60,7 +60,7 @@ func addSingleSumNumberDataPoint(
 	settings Settings,
 	series map[string]*prompb.TimeSeries,
 ) {
-	name := prometheustranslator.BuildPromCompliantName(metric, settings.Namespace)
+	name := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes)
 	labels := createAttributes(
 		resource,
 		pt.Attributes(),
diff --git a/storage/remote/otlptranslator/update-copy.sh b/storage/remote/otlptranslator/update-copy.sh
index 13a2a7a2e6..36ad0cc35c 100755
--- a/storage/remote/otlptranslator/update-copy.sh
+++ b/storage/remote/otlptranslator/update-copy.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-OTEL_VERSION=v0.81.0
+OTEL_VERSION=v0.88.0
 
 git clone https://github.com/open-telemetry/opentelemetry-collector-contrib ./tmp
 cd ./tmp
@@ -8,7 +8,8 @@ git checkout $OTEL_VERSION
 cd ..
 rm -rf ./prometheusremotewrite/*
 cp -r ./tmp/pkg/translator/prometheusremotewrite/*.go ./prometheusremotewrite
-rm -rf ./prometheusremotewrite/*_test.go
+cp -r ./tmp/pkg/translator/prometheus/*.go ./prometheus
+rm -rf ./prometheus/*_test.go
 rm -rf ./tmp
 
 sed -i '' 's#github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus#github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus#g' ./prometheusremotewrite/*.go

From 4710679fbe8fd8e6e40c85a59ee0d86befb39c57 Mon Sep 17 00:00:00 2001
From: Goutham <gouthamve@gmail.com>
Date: Wed, 15 Nov 2023 15:30:09 +0100
Subject: [PATCH 54/66] Skip golanglint-ci on copied folders

Signed-off-by: Goutham <gouthamve@gmail.com>
---
 .golangci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.golangci.yml b/.golangci.yml
index 666d22cbe4..4df572c198 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -6,6 +6,7 @@ run:
   skip-dirs:
     # Copied it from a different source
     - storage/remote/otlptranslator/prometheusremotewrite
+    - storage/remote/otlptranslator/prometheus
 
 output:
   sort-results: true

From 3048a88ae76c5d42d88726a4ba5ffe95964541e6 Mon Sep 17 00:00:00 2001
From: Goutham <gouthamve@gmail.com>
Date: Wed, 15 Nov 2023 15:52:18 +0100
Subject: [PATCH 55/66] Add suffixes

Older version already did that. This upgrade needed manual opt-in

Signed-off-by: Goutham <gouthamve@gmail.com>
---
 storage/remote/write_handler.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go
index a0dd3940e2..9891c6aae7 100644
--- a/storage/remote/write_handler.go
+++ b/storage/remote/write_handler.go
@@ -207,7 +207,9 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	prwMetricsMap, errs := otlptranslator.FromMetrics(req.Metrics(), otlptranslator.Settings{})
+	prwMetricsMap, errs := otlptranslator.FromMetrics(req.Metrics(), otlptranslator.Settings{
+		AddMetricSuffixes: true,
+	})
 	if errs != nil {
 		level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", errs)
 	}

From b53254a81881d5439429a0261d0827dbf9cc1d9d Mon Sep 17 00:00:00 2001
From: Kemal Akkoyun <kakkoyun@gmail.com>
Date: Wed, 15 Nov 2023 18:10:39 +0100
Subject: [PATCH 56/66] Upgrade golang.org/x packages

Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com>
---
 go.mod | 18 +++++++++---------
 go.sum | 31 ++++++++++++++++++-------------
 2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/go.mod b/go.mod
index 44cf7c4b2e..caca71f81d 100644
--- a/go.mod
+++ b/go.mod
@@ -68,13 +68,13 @@ require (
 	go.uber.org/automaxprocs v1.5.3
 	go.uber.org/goleak v1.2.1
 	go.uber.org/multierr v1.11.0
-	golang.org/x/exp v0.0.0-20231006140011-7918f672742d
-	golang.org/x/net v0.17.0
+	golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
+	golang.org/x/net v0.18.0
 	golang.org/x/oauth2 v0.13.0
-	golang.org/x/sync v0.4.0
-	golang.org/x/sys v0.13.0
+	golang.org/x/sync v0.5.0
+	golang.org/x/sys v0.14.0
 	golang.org/x/time v0.3.0
-	golang.org/x/tools v0.14.0
+	golang.org/x/tools v0.15.0
 	google.golang.org/api v0.147.0
 	google.golang.org/genproto/googleapis/api v0.0.0-20231012201019-e917dd12ba7a
 	google.golang.org/grpc v1.59.0
@@ -178,10 +178,10 @@ require (
 	go.opencensus.io v0.24.0 // indirect
 	go.opentelemetry.io/otel/metric v1.19.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.0.0 // indirect
-	golang.org/x/crypto v0.14.0 // indirect
-	golang.org/x/mod v0.13.0 // indirect
-	golang.org/x/term v0.13.0 // indirect
-	golang.org/x/text v0.13.0 // indirect
+	golang.org/x/crypto v0.15.0 // indirect
+	golang.org/x/mod v0.14.0 // indirect
+	golang.org/x/term v0.14.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
 	google.golang.org/appengine v1.6.7 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
diff --git a/go.sum b/go.sum
index c4658fbdbe..e6436825d4 100644
--- a/go.sum
+++ b/go.sum
@@ -811,8 +811,9 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
 golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.15.0 h1:frVn1TEaCEaZcn3Tmd7Y2b5KKPaZ+I32Q2OA3kYp5TA=
+golang.org/x/crypto v0.15.0/go.mod h1:4ChreQoLWfG3xLDer1WdlH5NdlQ3+mwnQq1YTKY+72g=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -823,8 +824,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
 golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
+golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
+golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
 golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
 golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -847,8 +848,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
-golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
+golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -894,8 +895,9 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
 golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
-golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg=
+golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -918,8 +920,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
-golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
+golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -988,15 +990,17 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
 golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
+golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
-golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8=
+golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1009,8 +1013,9 @@ golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
-golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1071,8 +1076,8 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
-golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
+golang.org/x/tools v0.15.0 h1:zdAyfUGbYmuVokhzVmghFl2ZJh5QhcfebBgmVPFYA+8=
+golang.org/x/tools v0.15.0/go.mod h1:hpksKq4dtpQWS1uQ61JkdqWM3LscIS6Slf+VVkm+wQk=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

From 32ee1b15de6220ab975f3dac7eb82131a0b1e95f Mon Sep 17 00:00:00 2001
From: zenador <zenador@users.noreply.github.com>
Date: Thu, 16 Nov 2023 22:07:37 +0800
Subject: [PATCH 57/66] Fix error on ingesting out-of-order exemplars (#13021)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix and improve ingesting exemplars for native histograms.

See code comment for a detailed explanation of the algorithm.

Note that this changes the current behavior for all kind of samples slightly: We now allow exemplars with the same timestamp as during the last scrape if the value or the labels have changed.

Also note that we now do not ingest exemplars without timestamps for native histograms anymore.

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>

---------

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com>
Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Signed-off-by: zenador <zenador@users.noreply.github.com>
Co-authored-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
Co-authored-by: Björn Rabenstein <github@rabenste.in>
---
 model/textparse/protobufparse.go      | 14 ++++--
 model/textparse/protobufparse_test.go |  8 ----
 scrape/scrape.go                      | 62 +++++++++++++++++++--------
 scrape/scrape_test.go                 | 47 +++++++++++++++++---
 tsdb/exemplar.go                      | 19 +++++++-
 5 files changed, 113 insertions(+), 37 deletions(-)

diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go
index 9a6dd6f6dc..23afb5c596 100644
--- a/model/textparse/protobufparse.go
+++ b/model/textparse/protobufparse.go
@@ -317,22 +317,28 @@ func (p *ProtobufParser) Exemplar(ex *exemplar.Exemplar) bool {
 		exProto = m.GetCounter().GetExemplar()
 	case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
 		bb := m.GetHistogram().GetBucket()
+		isClassic := p.state == EntrySeries
 		if p.fieldPos < 0 {
-			if p.state == EntrySeries {
+			if isClassic {
 				return false // At _count or _sum.
 			}
 			p.fieldPos = 0 // Start at 1st bucket for native histograms.
 		}
 		for p.fieldPos < len(bb) {
 			exProto = bb[p.fieldPos].GetExemplar()
-			if p.state == EntrySeries {
+			if isClassic {
 				break
 			}
 			p.fieldPos++
-			if exProto != nil {
-				break
+			// We deliberately drop exemplars with no timestamp only for native histograms.
+			if exProto != nil && (isClassic || exProto.GetTimestamp() != nil) {
+				break // Found a classic histogram exemplar or a native histogram exemplar with a timestamp.
 			}
 		}
+		// If the last exemplar for native histograms has no timestamp, ignore it.
+		if !isClassic && exProto.GetTimestamp() == nil {
+			return false
+		}
 	default:
 		return false
 	}
diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go
index 10ec5f4405..d83f2088a1 100644
--- a/model/textparse/protobufparse_test.go
+++ b/model/textparse/protobufparse_test.go
@@ -729,7 +729,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{
@@ -766,7 +765,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{
@@ -802,7 +800,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{
@@ -839,7 +836,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{
@@ -1233,7 +1229,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{ // 12
@@ -1328,7 +1323,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{ // 21
@@ -1422,7 +1416,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{ // 30
@@ -1517,7 +1510,6 @@ func TestProtobufParse(t *testing.T) {
 					),
 					e: []exemplar.Exemplar{
 						{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146},
-						{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false},
 					},
 				},
 				{ // 39
diff --git a/scrape/scrape.go b/scrape/scrape.go
index 790ee18af1..1bcc333d82 100644
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@@ -24,6 +24,7 @@ import (
 	"math"
 	"net/http"
 	"reflect"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -1404,6 +1405,8 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string,
 		metadataChanged bool
 	)
 
+	exemplars := make([]exemplar.Exemplar, 1)
+
 	// updateMetadata updates the current iteration's metadata object and the
 	// metadataChanged value if we have metadata in the scrape cache AND the
 	// labelset is for a new series or the metadata for this series has just
@@ -1569,18 +1572,55 @@ loop:
 		// Increment added even if there's an error so we correctly report the
 		// number of samples remaining after relabeling.
 		added++
-
+		exemplars = exemplars[:0] // Reset and reuse the exemplar slice.
 		for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) {
 			if !e.HasTs {
+				if isHistogram {
+					// We drop exemplars for native histograms if they don't have a timestamp.
+					// Missing timestamps are deliberately not supported as we want to start
+					// enforcing timestamps for exemplars as otherwise proper deduplication
+					// is inefficient and purely based on heuristics: we cannot distinguish
+					// between repeated exemplars and new instances with the same values.
+					// This is done silently without logs as it is not an error but out of spec.
+					// This does not affect classic histograms so that behaviour is unchanged.
+					e = exemplar.Exemplar{} // Reset for next time round loop.
+					continue
+				}
 				e.Ts = t
 			}
+			exemplars = append(exemplars, e)
+			e = exemplar.Exemplar{} // Reset for next time round loop.
+		}
+		sort.Slice(exemplars, func(i, j int) bool {
+			// Sort first by timestamp, then value, then labels so the checking
+			// for duplicates / out of order is more efficient during validation.
+			if exemplars[i].Ts != exemplars[j].Ts {
+				return exemplars[i].Ts < exemplars[j].Ts
+			}
+			if exemplars[i].Value != exemplars[j].Value {
+				return exemplars[i].Value < exemplars[j].Value
+			}
+			return exemplars[i].Labels.Hash() < exemplars[j].Labels.Hash()
+		})
+		outOfOrderExemplars := 0
+		for _, e := range exemplars {
 			_, exemplarErr := app.AppendExemplar(ref, lset, e)
-			exemplarErr = sl.checkAddExemplarError(exemplarErr, e, &appErrs)
-			if exemplarErr != nil {
+			switch {
+			case exemplarErr == nil:
+				// Do nothing.
+			case errors.Is(exemplarErr, storage.ErrOutOfOrderExemplar):
+				outOfOrderExemplars++
+			default:
 				// Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors.
 				level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr)
 			}
-			e = exemplar.Exemplar{} // reset for next time round loop
+		}
+		if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) {
+			// Only report out of order exemplars if all are out of order, otherwise this was a partial update
+			// to some existing set of exemplars.
+			appErrs.numExemplarOutOfOrder += outOfOrderExemplars
+			level.Debug(sl.l).Log("msg", "Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1]))
+			sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars))
 		}
 
 		if sl.appendMetadataToWAL && metadataChanged {
@@ -1673,20 +1713,6 @@ func (sl *scrapeLoop) checkAddError(ce *cacheEntry, met []byte, tp *int64, err e
 	}
 }
 
-func (sl *scrapeLoop) checkAddExemplarError(err error, e exemplar.Exemplar, appErrs *appendErrors) error {
-	switch {
-	case errors.Is(err, storage.ErrNotFound):
-		return storage.ErrNotFound
-	case errors.Is(err, storage.ErrOutOfOrderExemplar):
-		appErrs.numExemplarOutOfOrder++
-		level.Debug(sl.l).Log("msg", "Out of order exemplar", "exemplar", fmt.Sprintf("%+v", e))
-		sl.metrics.targetScrapeExemplarOutOfOrder.Inc()
-		return nil
-	default:
-		return err
-	}
-}
-
 // The constants are suffixed with the invalid \xff unicode rune to avoid collisions
 // with scraped metrics in the cache.
 var (
diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index a2e0d00c6c..522d2e1f86 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -2155,7 +2155,7 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000
 			},
 		},
 		{
-			title: "Native histogram with two exemplars",
+			title: "Native histogram with three exemplars",
 			scrapeText: `name: "test_histogram"
 help: "Test histogram with many buckets removed to keep it manageable in size."
 type: HISTOGRAM
@@ -2193,6 +2193,21 @@ metric: <
         value: -0.00029
       >
     >
+    bucket: <
+      cumulative_count: 32
+      upper_bound: -0.0001899999999999998
+      exemplar: <
+        label: <
+          name: "dummyID"
+          value: "58215"
+        >
+        value: -0.00019
+        timestamp: <
+          seconds: 1625851055
+          nanos: 146848599
+        >
+      >
+    >
     schema: 3
     zero_threshold: 2.938735877055719e-39
     zero_count: 2
@@ -2248,12 +2263,13 @@ metric: <
 				},
 			}},
 			exemplars: []exemplar.Exemplar{
+				// Native histogram exemplars are arranged by timestamp, and those with missing timestamps are dropped.
+				{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true},
 				{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
-				{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false},
 			},
 		},
 		{
-			title: "Native histogram with two exemplars scraped as classic histogram",
+			title: "Native histogram with three exemplars scraped as classic histogram",
 			scrapeText: `name: "test_histogram"
 help: "Test histogram with many buckets removed to keep it manageable in size."
 type: HISTOGRAM
@@ -2291,6 +2307,21 @@ metric: <
         value: -0.00029
       >
     >
+    bucket: <
+      cumulative_count: 32
+      upper_bound: -0.0001899999999999998
+      exemplar: <
+        label: <
+          name: "dummyID"
+          value: "58215"
+        >
+        value: -0.00019
+        timestamp: <
+          seconds: 1625851055
+          nanos: 146848599
+        >
+      >
+    >
     schema: 3
     zero_threshold: 2.938735877055719e-39
     zero_count: 2
@@ -2332,6 +2363,7 @@ metric: <
 				{metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), t: 1234568, f: 2},
 				{metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), t: 1234568, f: 4},
 				{metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), t: 1234568, f: 16},
+				{metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0001899999999999998"), t: 1234568, f: 32},
 				{metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: 1234568, f: 175},
 			},
 			histograms: []histogramSample{{
@@ -2355,10 +2387,15 @@ metric: <
 				},
 			}},
 			exemplars: []exemplar.Exemplar{
+				// Native histogram one is arranged by timestamp.
+				// Exemplars with missing timestamps are dropped for native histograms.
+				{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true},
+				{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
+				// Classic histogram one is in order of appearance.
+				// Exemplars with missing timestamps are supported for classic histograms.
 				{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
 				{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false},
-				{Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, Ts: 1625851155146, HasTs: true},
-				{Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, Ts: 1234568, HasTs: false},
+				{Labels: labels.FromStrings("dummyID", "58215"), Value: -0.00019, Ts: 1625851055146, HasTs: true},
 			},
 		},
 	}
diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go
index 904fc7c2bd..8eaf42653c 100644
--- a/tsdb/exemplar.go
+++ b/tsdb/exemplar.go
@@ -245,11 +245,26 @@ func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemp
 
 	// Check for duplicate vs last stored exemplar for this series.
 	// NB these are expected, and appending them is a no-op.
-	if ce.exemplars[idx.newest].exemplar.Equals(e) {
+	// For floats and classic histograms, there is only 1 exemplar per series,
+	// so this is sufficient. For native histograms with multiple exemplars per series,
+	// we have another check below.
+	newestExemplar := ce.exemplars[idx.newest].exemplar
+	if newestExemplar.Equals(e) {
 		return storage.ErrDuplicateExemplar
 	}
 
-	if e.Ts <= ce.exemplars[idx.newest].exemplar.Ts {
+	// Since during the scrape the exemplars are sorted first by timestamp, then value, then labels,
+	// if any of these conditions are true, we know that the exemplar is either a duplicate
+	// of a previous one (but not the most recent one as that is checked above) or out of order.
+	// We now allow exemplars with duplicate timestamps as long as they have different values and/or labels
+	// since that can happen for different buckets of a native histogram.
+	// We do not distinguish between duplicates and out of order as iterating through the exemplars
+	// to check for that would be expensive (versus just comparing with the most recent one) especially
+	// since this is run under a lock, and not worth it as we just need to return an error so we do not
+	// append the exemplar.
+	if e.Ts < newestExemplar.Ts ||
+		(e.Ts == newestExemplar.Ts && e.Value < newestExemplar.Value) ||
+		(e.Ts == newestExemplar.Ts && e.Value == newestExemplar.Value && e.Labels.Hash() < newestExemplar.Labels.Hash()) {
 		if appended {
 			ce.metrics.outOfOrderExemplars.Inc()
 		}

From f997c72f294c0f18ca13fa06d51889af04135195 Mon Sep 17 00:00:00 2001
From: Oleg Zaytsev <mail@olegzaytsev.com>
Date: Fri, 17 Nov 2023 12:29:36 +0100
Subject: [PATCH 58/66] Make head block ULIDs descriptive (#13100)

* Make head block ULIDs descriptive

As far as I understand, these ULIDs aren't persisted anywhere, so it
should be safe to change them.

When debugging an issue, seeing an ULID like
`2ZBXFNYVVFDXFPGSB1CHFNYQTZ` or `33DXR7JA39CHDKMQ9C40H6YVVF` isn't very
helpful, so I propose to make them readable in their ULID string
version.

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>

* Set a different ULID for RangeHead

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>

---------

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
---
 tsdb/head.go          | 10 ++++++----
 tsdb/ooo_head.go      |  8 +++++---
 tsdb/ooo_head_read.go |  7 ++++---
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tsdb/head.go b/tsdb/head.go
index f7e697e54a..d3b2b09cce 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -1407,11 +1407,13 @@ func (h *RangeHead) NumSeries() uint64 {
 	return h.head.NumSeries()
 }
 
+var rangeHeadULID = ulid.MustParse("0000000000XXXXXXXRANGEHEAD")
+
 func (h *RangeHead) Meta() BlockMeta {
 	return BlockMeta{
 		MinTime: h.MinTime(),
 		MaxTime: h.MaxTime(),
-		ULID:    h.head.Meta().ULID,
+		ULID:    rangeHeadULID,
 		Stats: BlockStats{
 			NumSeries: h.NumSeries(),
 		},
@@ -1537,15 +1539,15 @@ func (h *Head) NumSeries() uint64 {
 	return h.numSeries.Load()
 }
 
+var headULID = ulid.MustParse("0000000000XXXXXXXXXXXXHEAD")
+
 // Meta returns meta information about the head.
 // The head is dynamic so will return dynamic results.
 func (h *Head) Meta() BlockMeta {
-	var id [16]byte
-	copy(id[:], "______head______")
 	return BlockMeta{
 		MinTime: h.MinTime(),
 		MaxTime: h.MaxTime(),
-		ULID:    ulid.ULID(id),
+		ULID:    headULID,
 		Stats: BlockStats{
 			NumSeries: h.NumSeries(),
 		},
diff --git a/tsdb/ooo_head.go b/tsdb/ooo_head.go
index 45827889e6..1251af4a97 100644
--- a/tsdb/ooo_head.go
+++ b/tsdb/ooo_head.go
@@ -17,6 +17,8 @@ import (
 	"fmt"
 	"sort"
 
+	"github.com/oklog/ulid"
+
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
 	"github.com/prometheus/prometheus/tsdb/tombstones"
 )
@@ -135,13 +137,13 @@ func (oh *OOORangeHead) Tombstones() (tombstones.Reader, error) {
 	return tombstones.NewMemTombstones(), nil
 }
 
+var oooRangeHeadULID = ulid.MustParse("0000000000XXXX000RANGEHEAD")
+
 func (oh *OOORangeHead) Meta() BlockMeta {
-	var id [16]byte
-	copy(id[:], "____ooo_head____")
 	return BlockMeta{
 		MinTime: oh.mint,
 		MaxTime: oh.maxt,
-		ULID:    id,
+		ULID:    oooRangeHeadULID,
 		Stats: BlockStats{
 			NumSeries: oh.head.NumSeries(),
 		},
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index a7a1e9da2c..b9c2dc4a50 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -18,6 +18,7 @@ import (
 	"errors"
 	"math"
 
+	"github.com/oklog/ulid"
 	"golang.org/x/exp/slices"
 
 	"github.com/prometheus/prometheus/model/labels"
@@ -371,13 +372,13 @@ func (ch *OOOCompactionHead) Tombstones() (tombstones.Reader, error) {
 	return tombstones.NewMemTombstones(), nil
 }
 
+var oooCompactionHeadULID = ulid.MustParse("0000000000XX000COMPACTHEAD")
+
 func (ch *OOOCompactionHead) Meta() BlockMeta {
-	var id [16]byte
-	copy(id[:], "copy(id[:], \"ooo_compact_head\")")
 	return BlockMeta{
 		MinTime: ch.mint,
 		MaxTime: ch.maxt,
-		ULID:    id,
+		ULID:    oooCompactionHeadULID,
 		Stats: BlockStats{
 			NumSeries: uint64(len(ch.postings)),
 		},

From a3e02f35d63c1d92ac607d33da7a9661f53ae1cb Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 17 Nov 2023 18:19:40 +0000
Subject: [PATCH 59/66] labels: extract common code between slice and
 stringlabels

This reduces bulk and should avoid issues if a fix is made in one file
and not the other.

A few methods now call `Range()` instead of `range`, but nothing
performance-sensitive.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 model/labels/labels.go              | 214 -------------------------
 model/labels/labels_common.go       | 235 ++++++++++++++++++++++++++++
 model/labels/labels_stringlabels.go | 226 --------------------------
 3 files changed, 235 insertions(+), 440 deletions(-)
 create mode 100644 model/labels/labels_common.go

diff --git a/model/labels/labels.go b/model/labels/labels.go
index 231460ea33..bf67224bba 100644
--- a/model/labels/labels.go
+++ b/model/labels/labels.go
@@ -17,32 +17,12 @@ package labels
 
 import (
 	"bytes"
-	"encoding/json"
-	"strconv"
 	"strings"
 
 	"github.com/cespare/xxhash/v2"
-	"github.com/prometheus/common/model"
 	"golang.org/x/exp/slices"
 )
 
-// Well-known label names used by Prometheus components.
-const (
-	MetricName   = "__name__"
-	AlertName    = "alertname"
-	BucketLabel  = "le"
-	InstanceName = "instance"
-
-	labelSep = '\xfe'
-)
-
-var seps = []byte{'\xff'}
-
-// Label is a key/value pair of strings.
-type Label struct {
-	Name, Value string
-}
-
 // Labels is a sorted set of labels. Order has to be guaranteed upon
 // instantiation.
 type Labels []Label
@@ -51,23 +31,6 @@ func (ls Labels) Len() int           { return len(ls) }
 func (ls Labels) Swap(i, j int)      { ls[i], ls[j] = ls[j], ls[i] }
 func (ls Labels) Less(i, j int) bool { return ls[i].Name < ls[j].Name }
 
-func (ls Labels) String() string {
-	var b bytes.Buffer
-
-	b.WriteByte('{')
-	for i, l := range ls {
-		if i > 0 {
-			b.WriteByte(',')
-			b.WriteByte(' ')
-		}
-		b.WriteString(l.Name)
-		b.WriteByte('=')
-		b.WriteString(strconv.Quote(l.Value))
-	}
-	b.WriteByte('}')
-	return b.String()
-}
-
 // Bytes returns ls as a byte slice.
 // It uses an byte invalid character as a separator and so should not be used for printing.
 func (ls Labels) Bytes(buf []byte) []byte {
@@ -84,40 +47,6 @@ func (ls Labels) Bytes(buf []byte) []byte {
 	return b.Bytes()
 }
 
-// MarshalJSON implements json.Marshaler.
-func (ls Labels) MarshalJSON() ([]byte, error) {
-	return json.Marshal(ls.Map())
-}
-
-// UnmarshalJSON implements json.Unmarshaler.
-func (ls *Labels) UnmarshalJSON(b []byte) error {
-	var m map[string]string
-
-	if err := json.Unmarshal(b, &m); err != nil {
-		return err
-	}
-
-	*ls = FromMap(m)
-	return nil
-}
-
-// MarshalYAML implements yaml.Marshaler.
-func (ls Labels) MarshalYAML() (interface{}, error) {
-	return ls.Map(), nil
-}
-
-// UnmarshalYAML implements yaml.Unmarshaler.
-func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error {
-	var m map[string]string
-
-	if err := unmarshal(&m); err != nil {
-		return err
-	}
-
-	*ls = FromMap(m)
-	return nil
-}
-
 // MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean.
 // If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false.
 func (ls Labels) MatchLabels(on bool, names ...string) Labels {
@@ -318,19 +247,6 @@ func (ls Labels) WithoutEmpty() Labels {
 	return ls
 }
 
-// IsValid checks if the metric name or label names are valid.
-func (ls Labels) IsValid() bool {
-	for _, l := range ls {
-		if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) {
-			return false
-		}
-		if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
-			return false
-		}
-	}
-	return true
-}
-
 // Equal returns whether the two label sets are equal.
 func Equal(ls, o Labels) bool {
 	if len(ls) != len(o) {
@@ -344,15 +260,6 @@ func Equal(ls, o Labels) bool {
 	return true
 }
 
-// Map returns a string map of the labels.
-func (ls Labels) Map() map[string]string {
-	m := make(map[string]string, len(ls))
-	for _, l := range ls {
-		m[l.Name] = l.Value
-	}
-	return m
-}
-
 // EmptyLabels returns n empty Labels value, for convenience.
 func EmptyLabels() Labels {
 	return Labels{}
@@ -368,15 +275,6 @@ func New(ls ...Label) Labels {
 	return set
 }
 
-// FromMap returns new sorted Labels from the given map.
-func FromMap(m map[string]string) Labels {
-	l := make([]Label, 0, len(m))
-	for k, v := range m {
-		l = append(l, Label{Name: k, Value: v})
-	}
-	return New(l...)
-}
-
 // FromStrings creates new labels from pairs of strings.
 func FromStrings(ss ...string) Labels {
 	if len(ss)%2 != 0 {
@@ -460,118 +358,6 @@ func (ls Labels) ReleaseStrings(release func(string)) {
 	}
 }
 
-// Builder allows modifying Labels.
-type Builder struct {
-	base Labels
-	del  []string
-	add  []Label
-}
-
-// NewBuilder returns a new LabelsBuilder.
-func NewBuilder(base Labels) *Builder {
-	b := &Builder{
-		del: make([]string, 0, 5),
-		add: make([]Label, 0, 5),
-	}
-	b.Reset(base)
-	return b
-}
-
-// Reset clears all current state for the builder.
-func (b *Builder) Reset(base Labels) {
-	b.base = base
-	b.del = b.del[:0]
-	b.add = b.add[:0]
-	for _, l := range b.base {
-		if l.Value == "" {
-			b.del = append(b.del, l.Name)
-		}
-	}
-}
-
-// Del deletes the label of the given name.
-func (b *Builder) Del(ns ...string) *Builder {
-	for _, n := range ns {
-		for i, a := range b.add {
-			if a.Name == n {
-				b.add = append(b.add[:i], b.add[i+1:]...)
-			}
-		}
-		b.del = append(b.del, n)
-	}
-	return b
-}
-
-// Keep removes all labels from the base except those with the given names.
-func (b *Builder) Keep(ns ...string) *Builder {
-Outer:
-	for _, l := range b.base {
-		for _, n := range ns {
-			if l.Name == n {
-				continue Outer
-			}
-		}
-		b.del = append(b.del, l.Name)
-	}
-	return b
-}
-
-// Set the name/value pair as a label. A value of "" means delete that label.
-func (b *Builder) Set(n, v string) *Builder {
-	if v == "" {
-		// Empty labels are the same as missing labels.
-		return b.Del(n)
-	}
-	for i, a := range b.add {
-		if a.Name == n {
-			b.add[i].Value = v
-			return b
-		}
-	}
-	b.add = append(b.add, Label{Name: n, Value: v})
-
-	return b
-}
-
-func (b *Builder) Get(n string) string {
-	// Del() removes entries from .add but Set() does not remove from .del, so check .add first.
-	for _, a := range b.add {
-		if a.Name == n {
-			return a.Value
-		}
-	}
-	if slices.Contains(b.del, n) {
-		return ""
-	}
-	return b.base.Get(n)
-}
-
-// Range calls f on each label in the Builder.
-func (b *Builder) Range(f func(l Label)) {
-	// Stack-based arrays to avoid heap allocation in most cases.
-	var addStack [128]Label
-	var delStack [128]string
-	// Take a copy of add and del, so they are unaffected by calls to Set() or Del().
-	origAdd, origDel := append(addStack[:0], b.add...), append(delStack[:0], b.del...)
-	b.base.Range(func(l Label) {
-		if !slices.Contains(origDel, l.Name) && !contains(origAdd, l.Name) {
-			f(l)
-		}
-	})
-	for _, a := range origAdd {
-		f(a)
-	}
-}
-
-func contains(s []Label, n string) bool {
-	for _, a := range s {
-		if a.Name == n {
-			return true
-		}
-	}
-	return false
-}
-
 // Labels returns the labels from the builder.
 // If no modifications were made, the original labels are returned.
 func (b *Builder) Labels() Labels {
diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go
new file mode 100644
index 0000000000..2a722b84cc
--- /dev/null
+++ b/model/labels/labels_common.go
@@ -0,0 +1,235 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package labels
+
+import (
+	"bytes"
+	"encoding/json"
+	"strconv"
+
+	"github.com/prometheus/common/model"
+	"golang.org/x/exp/slices"
+)
+
+const (
+	MetricName   = "__name__"
+	AlertName    = "alertname"
+	BucketLabel  = "le"
+	InstanceName = "instance"
+
+	labelSep = '\xfe'
+)
+
+var seps = []byte{'\xff'}
+
+// Label is a key/value pair of strings.
+type Label struct {
+	Name, Value string
+}
+
+func (ls Labels) String() string {
+	var b bytes.Buffer
+
+	b.WriteByte('{')
+	i := 0
+	ls.Range(func(l Label) {
+		if i > 0 {
+			b.WriteByte(',')
+			b.WriteByte(' ')
+		}
+		b.WriteString(l.Name)
+		b.WriteByte('=')
+		b.WriteString(strconv.Quote(l.Value))
+		i++
+	})
+	b.WriteByte('}')
+	return b.String()
+}
+
+// MarshalJSON implements json.Marshaler.
+func (ls Labels) MarshalJSON() ([]byte, error) {
+	return json.Marshal(ls.Map())
+}
+
+// UnmarshalJSON implements json.Unmarshaler.
+func (ls *Labels) UnmarshalJSON(b []byte) error {
+	var m map[string]string
+
+	if err := json.Unmarshal(b, &m); err != nil {
+		return err
+	}
+
+	*ls = FromMap(m)
+	return nil
+}
+
+// MarshalYAML implements yaml.Marshaler.
+func (ls Labels) MarshalYAML() (interface{}, error) {
+	return ls.Map(), nil
+}
+
+// UnmarshalYAML implements yaml.Unmarshaler.
+func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	var m map[string]string
+
+	if err := unmarshal(&m); err != nil {
+		return err
+	}
+
+	*ls = FromMap(m)
+	return nil
+}
+
+// IsValid checks if the metric name or label names are valid.
+func (ls Labels) IsValid() bool {
+	err := ls.Validate(func(l Label) error {
+		if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) {
+			return strconv.ErrSyntax
+		}
+		if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
+			return strconv.ErrSyntax
+		}
+		return nil
+	})
+	return err == nil
+}
+
+// Map returns a string map of the labels.
+func (ls Labels) Map() map[string]string {
+	m := make(map[string]string)
+	ls.Range(func(l Label) {
+		m[l.Name] = l.Value
+	})
+	return m
+}
+
+// FromMap returns new sorted Labels from the given map.
+func FromMap(m map[string]string) Labels {
+	l := make([]Label, 0, len(m))
+	for k, v := range m {
+		l = append(l, Label{Name: k, Value: v})
+	}
+	return New(l...)
+}
+
+// Builder allows modifying Labels.
+type Builder struct {
+	base Labels
+	del  []string
+	add  []Label
+}
+
+// NewBuilder returns a new LabelsBuilder.
+func NewBuilder(base Labels) *Builder {
+	b := &Builder{
+		del: make([]string, 0, 5),
+		add: make([]Label, 0, 5),
+	}
+	b.Reset(base)
+	return b
+}
+
+// Reset clears all current state for the builder.
+func (b *Builder) Reset(base Labels) {
+	b.base = base
+	b.del = b.del[:0]
+	b.add = b.add[:0]
+	b.base.Range(func(l Label) {
+		if l.Value == "" {
+			b.del = append(b.del, l.Name)
+		}
+	})
+}
+
+// Del deletes the label of the given name.
+func (b *Builder) Del(ns ...string) *Builder {
+	for _, n := range ns {
+		for i, a := range b.add {
+			if a.Name == n {
+				b.add = append(b.add[:i], b.add[i+1:]...)
+			}
+		}
+		b.del = append(b.del, n)
+	}
+	return b
+}
+
+// Keep removes all labels from the base except those with the given names.
+func (b *Builder) Keep(ns ...string) *Builder {
+	b.base.Range(func(l Label) {
+		for _, n := range ns {
+			if l.Name == n {
+				return
+			}
+		}
+		b.del = append(b.del, l.Name)
+	})
+	return b
+}
+
+// Set the name/value pair as a label. A value of "" means delete that label.
+func (b *Builder) Set(n, v string) *Builder {
+	if v == "" {
+		// Empty labels are the same as missing labels.
+		return b.Del(n)
+	}
+	for i, a := range b.add {
+		if a.Name == n {
+			b.add[i].Value = v
+			return b
+		}
+	}
+	b.add = append(b.add, Label{Name: n, Value: v})
+
+	return b
+}
+
+func (b *Builder) Get(n string) string {
+	// Del() removes entries from .add but Set() does not remove from .del, so check .add first.
+	for _, a := range b.add {
+		if a.Name == n {
+			return a.Value
+		}
+	}
+	if slices.Contains(b.del, n) {
+		return ""
+	}
+	return b.base.Get(n)
+}
+
+// Range calls f on each label in the Builder.
+func (b *Builder) Range(f func(l Label)) {
+	// Stack-based arrays to avoid heap allocation in most cases.
+	var addStack [128]Label
+	var delStack [128]string
+	// Take a copy of add and del, so they are unaffected by calls to Set() or Del().
+	origAdd, origDel := append(addStack[:0], b.add...), append(delStack[:0], b.del...)
+	b.base.Range(func(l Label) {
+		if !slices.Contains(origDel, l.Name) && !contains(origAdd, l.Name) {
+			f(l)
+		}
+	})
+	for _, a := range origAdd {
+		f(a)
+	}
+}
+
+func contains(s []Label, n string) bool {
+	for _, a := range s {
+		if a.Name == n {
+			return true
+		}
+	}
+	return false
+}
diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go
index bbb4452d45..d79a836796 100644
--- a/model/labels/labels_stringlabels.go
+++ b/model/labels/labels_stringlabels.go
@@ -16,33 +16,14 @@
 package labels
 
 import (
-	"bytes"
-	"encoding/json"
 	"reflect"
-	"strconv"
 	"strings"
 	"unsafe"
 
 	"github.com/cespare/xxhash/v2"
-	"github.com/prometheus/common/model"
 	"golang.org/x/exp/slices"
 )
 
-// Well-known label names used by Prometheus components.
-const (
-	MetricName   = "__name__"
-	AlertName    = "alertname"
-	BucketLabel  = "le"
-	InstanceName = "instance"
-)
-
-var seps = []byte{'\xff'}
-
-// Label is a key/value pair of strings.
-type Label struct {
-	Name, Value string
-}
-
 // Labels is implemented by a single flat string holding name/value pairs.
 // Each name and value is preceded by its length in varint encoding.
 // Names are in order.
@@ -77,26 +58,6 @@ func decodeString(data string, index int) (string, int) {
 	return data[index : index+size], index + size
 }
 
-func (ls Labels) String() string {
-	var b bytes.Buffer
-
-	b.WriteByte('{')
-	for i := 0; i < len(ls.data); {
-		if i > 0 {
-			b.WriteByte(',')
-			b.WriteByte(' ')
-		}
-		var name, value string
-		name, i = decodeString(ls.data, i)
-		value, i = decodeString(ls.data, i)
-		b.WriteString(name)
-		b.WriteByte('=')
-		b.WriteString(strconv.Quote(value))
-	}
-	b.WriteByte('}')
-	return b.String()
-}
-
 // Bytes returns ls as a byte slice.
 // It uses non-printing characters and so should not be used for printing.
 func (ls Labels) Bytes(buf []byte) []byte {
@@ -109,45 +70,11 @@ func (ls Labels) Bytes(buf []byte) []byte {
 	return buf
 }
 
-// MarshalJSON implements json.Marshaler.
-func (ls Labels) MarshalJSON() ([]byte, error) {
-	return json.Marshal(ls.Map())
-}
-
-// UnmarshalJSON implements json.Unmarshaler.
-func (ls *Labels) UnmarshalJSON(b []byte) error {
-	var m map[string]string
-
-	if err := json.Unmarshal(b, &m); err != nil {
-		return err
-	}
-
-	*ls = FromMap(m)
-	return nil
-}
-
-// MarshalYAML implements yaml.Marshaler.
-func (ls Labels) MarshalYAML() (interface{}, error) {
-	return ls.Map(), nil
-}
-
 // IsZero implements yaml.IsZeroer - if we don't have this then 'omitempty' fields are always omitted.
 func (ls Labels) IsZero() bool {
 	return len(ls.data) == 0
 }
 
-// UnmarshalYAML implements yaml.Unmarshaler.
-func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error {
-	var m map[string]string
-
-	if err := unmarshal(&m); err != nil {
-		return err
-	}
-
-	*ls = FromMap(m)
-	return nil
-}
-
 // MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean.
 // If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false.
 // TODO: This is only used in printing an error message
@@ -364,37 +291,11 @@ func (ls Labels) WithoutEmpty() Labels {
 	return ls
 }
 
-// IsValid checks if the metric name or label names are valid.
-func (ls Labels) IsValid() bool {
-	err := ls.Validate(func(l Label) error {
-		if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) {
-			return strconv.ErrSyntax
-		}
-		if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
-			return strconv.ErrSyntax
-		}
-		return nil
-	})
-	return err == nil
-}
-
 // Equal returns whether the two label sets are equal.
 func Equal(ls, o Labels) bool {
 	return ls.data == o.data
 }
 
-// Map returns a string map of the labels.
-func (ls Labels) Map() map[string]string {
-	m := make(map[string]string, len(ls.data)/10)
-	for i := 0; i < len(ls.data); {
-		var lName, lValue string
-		lName, i = decodeString(ls.data, i)
-		lValue, i = decodeString(ls.data, i)
-		m[lName] = lValue
-	}
-	return m
-}
-
 // EmptyLabels returns an empty Labels value, for convenience.
 func EmptyLabels() Labels {
 	return Labels{}
@@ -420,15 +321,6 @@ func New(ls ...Label) Labels {
 	return Labels{data: yoloString(buf)}
 }
 
-// FromMap returns new sorted Labels from the given map.
-func FromMap(m map[string]string) Labels {
-	l := make([]Label, 0, len(m))
-	for k, v := range m {
-		l = append(l, Label{Name: k, Value: v})
-	}
-	return New(l...)
-}
-
 // FromStrings creates new labels from pairs of strings.
 func FromStrings(ss ...string) Labels {
 	if len(ss)%2 != 0 {
@@ -547,124 +439,6 @@ func (ls Labels) ReleaseStrings(release func(string)) {
 	release(ls.data)
 }
 
-// Builder allows modifying Labels.
-type Builder struct {
-	base Labels
-	del  []string
-	add  []Label
-}
-
-// NewBuilder returns a new LabelsBuilder.
-func NewBuilder(base Labels) *Builder {
-	b := &Builder{
-		del: make([]string, 0, 5),
-		add: make([]Label, 0, 5),
-	}
-	b.Reset(base)
-	return b
-}
-
-// Reset clears all current state for the builder.
-func (b *Builder) Reset(base Labels) {
-	b.base = base
-	b.del = b.del[:0]
-	b.add = b.add[:0]
-	for i := 0; i < len(base.data); {
-		var lName, lValue string
-		lName, i = decodeString(base.data, i)
-		lValue, i = decodeString(base.data, i)
-		if lValue == "" {
-			b.del = append(b.del, lName)
-		}
-	}
-}
-
-// Del deletes the label of the given name.
-func (b *Builder) Del(ns ...string) *Builder {
-	for _, n := range ns {
-		for i, a := range b.add {
-			if a.Name == n {
-				b.add = append(b.add[:i], b.add[i+1:]...)
-			}
-		}
-		b.del = append(b.del, n)
-	}
-	return b
-}
-
-// Keep removes all labels from the base except those with the given names.
-func (b *Builder) Keep(ns ...string) *Builder {
-Outer:
-	for i := 0; i < len(b.base.data); {
-		var lName string
-		lName, i = decodeString(b.base.data, i)
-		_, i = decodeString(b.base.data, i)
-		for _, n := range ns {
-			if lName == n {
-				continue Outer
-			}
-		}
-		b.del = append(b.del, lName)
-	}
-	return b
-}
-
-// Set the name/value pair as a label. A value of "" means delete that label.
-func (b *Builder) Set(n, v string) *Builder {
-	if v == "" {
-		// Empty labels are the same as missing labels.
-		return b.Del(n)
-	}
-	for i, a := range b.add {
-		if a.Name == n {
-			b.add[i].Value = v
-			return b
-		}
-	}
-	b.add = append(b.add, Label{Name: n, Value: v})
-
-	return b
-}
-
-func (b *Builder) Get(n string) string {
-	// Del() removes entries from .add but Set() does not remove from .del, so check .add first.
-	for _, a := range b.add {
-		if a.Name == n {
-			return a.Value
-		}
-	}
-	if slices.Contains(b.del, n) {
-		return ""
-	}
-	return b.base.Get(n)
-}
-
-// Range calls f on each label in the Builder.
-func (b *Builder) Range(f func(l Label)) {
-	// Stack-based arrays to avoid heap allocation in most cases.
-	var addStack [128]Label
-	var delStack [128]string
-	// Take a copy of add and del, so they are unaffected by calls to Set() or Del().
-	origAdd, origDel := append(addStack[:0], b.add...), append(delStack[:0], b.del...)
-	b.base.Range(func(l Label) {
-		if !slices.Contains(origDel, l.Name) && !contains(origAdd, l.Name) {
-			f(l)
-		}
-	})
-	for _, a := range origAdd {
-		f(a)
-	}
-}
-
-func contains(s []Label, n string) bool {
-	for _, a := range s {
-		if a.Name == n {
-			return true
-		}
-	}
-	return false
-}
-
 // Labels returns the labels from the builder.
 // If no modifications were made, the original labels are returned.
 func (b *Builder) Labels() Labels {

From 7e2c6fc8f3e0e42927325bdac623ea848575a66e Mon Sep 17 00:00:00 2001
From: wangqing <wangqing@uniontech.com>
Date: Mon, 20 Nov 2023 17:57:43 +0800
Subject: [PATCH 60/66] fix: The automatically generated file is inconsistent
 with the file in the code warehouse

reference: https://github.com/prometheus/prometheus/commit/3ef153b00cdd8842b49b5abe721418fa5e5de9e0

Signed-off-by: wangqing <wangqing@uniontech.com>
---
 plugins.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/plugins.yml b/plugins.yml
index c10dabddb6..c7b9d297d0 100644
--- a/plugins.yml
+++ b/plugins.yml
@@ -18,5 +18,6 @@
 - github.com/prometheus/prometheus/discovery/scaleway
 - github.com/prometheus/prometheus/discovery/triton
 - github.com/prometheus/prometheus/discovery/uyuni
+- github.com/prometheus/prometheus/discovery/vultr
 - github.com/prometheus/prometheus/discovery/xds
 - github.com/prometheus/prometheus/discovery/zookeeper

From 870627fbedb4dbe2a86eeb8c8795b9b2a0abb7f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Mierzwa?= <l.mierzwa@gmail.com>
Date: Mon, 20 Nov 2023 12:02:53 +0000
Subject: [PATCH 61/66] Add enable_compression scrape config option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently Prometheus will always request gzip compression from the target when sending scrape requests.
HTTP compression does reduce the amount of bytes sent over the wire and so is often desirable.
The downside of compression is that it requires extra resources - cpu & memory.

This also affects the resource usage on the target since it has to compress the response
before sending it to Prometheus.

This change adds a new option to the scrape job configuration block: enable_compression.
The default is true so it remains the same as current Prometheus behaviour.

Setting this option to false allows users to disable compression between Prometheus
and the scraped target, which will require more bandwidth but it lowers the resource
usage of both Prometheus and the target.

Fixes #12319.

Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
---
 config/config.go                              |  3 +
 config/config_test.go                         | 55 ++++++++++-
 ...scrape_config_disable_compression.good.yml |  5 +
 docs/configuration/configuration.md           |  4 +
 scrape/scrape.go                              | 47 ++++++---
 scrape/scrape_test.go                         | 96 ++++++++++++++++++-
 6 files changed, 192 insertions(+), 18 deletions(-)
 create mode 100644 config/testdata/scrape_config_disable_compression.good.yml

diff --git a/config/config.go b/config/config.go
index 4c73f6c496..b832ac9a17 100644
--- a/config/config.go
+++ b/config/config.go
@@ -158,6 +158,7 @@ var (
 		HonorLabels:             false,
 		HonorTimestamps:         true,
 		HTTPClientConfig:        config.DefaultHTTPClientConfig,
+		EnableCompression:       true,
 	}
 
 	// DefaultAlertmanagerConfig is the default alertmanager configuration.
@@ -582,6 +583,8 @@ type ScrapeConfig struct {
 	MetricsPath string `yaml:"metrics_path,omitempty"`
 	// The URL scheme with which to fetch metrics from targets.
 	Scheme string `yaml:"scheme,omitempty"`
+	// Indicator whether to request compressed response from the target.
+	EnableCompression bool `yaml:"enable_compression"`
 	// An uncompressed response body larger than this many bytes will cause the
 	// scrape to fail. 0 means no limit.
 	BodySizeLimit units.Base2Bytes `yaml:"body_size_limit,omitempty"`
diff --git a/config/config_test.go b/config/config_test.go
index 12c9891b04..408622cd5a 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -186,6 +186,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -288,6 +289,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(50 * time.Second),
 			ScrapeTimeout:         model.Duration(5 * time.Second),
+			EnableCompression:     true,
 			BodySizeLimit:         10 * units.MiB,
 			SampleLimit:           1000,
 			TargetLimit:           35,
@@ -384,6 +386,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -438,6 +441,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         model.Duration(10 * time.Second),
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -470,6 +474,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -508,6 +513,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -546,6 +552,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -573,6 +580,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -609,6 +617,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -642,6 +651,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -682,6 +692,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -712,6 +723,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -745,6 +757,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -771,6 +784,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -800,6 +814,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       false,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -829,6 +844,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -858,6 +874,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -884,6 +901,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -918,6 +936,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -951,6 +970,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -980,6 +1000,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1009,6 +1030,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1042,6 +1064,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1078,6 +1101,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1133,6 +1157,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1159,6 +1184,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1196,6 +1222,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1239,6 +1266,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1273,6 +1301,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1301,6 +1330,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -1332,6 +1362,7 @@ var expectedConf = &Config{
 			HonorTimestamps:       true,
 			ScrapeInterval:        model.Duration(15 * time.Second),
 			ScrapeTimeout:         DefaultGlobalConfig.ScrapeTimeout,
+			EnableCompression:     true,
 			BodySizeLimit:         globBodySizeLimit,
 			SampleLimit:           globSampleLimit,
 			TargetLimit:           globTargetLimit,
@@ -2060,9 +2091,10 @@ func TestGetScrapeConfigs(t *testing.T) {
 			ScrapeTimeout:   scrapeTimeout,
 			ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
 
-			MetricsPath:      "/metrics",
-			Scheme:           "http",
-			HTTPClientConfig: config.DefaultHTTPClientConfig,
+			MetricsPath:       "/metrics",
+			Scheme:            "http",
+			EnableCompression: true,
+			HTTPClientConfig:  config.DefaultHTTPClientConfig,
 			ServiceDiscoveryConfigs: discovery.Configs{
 				discovery.StaticConfig{
 					{
@@ -2118,6 +2150,8 @@ func TestGetScrapeConfigs(t *testing.T) {
 					MetricsPath: DefaultScrapeConfig.MetricsPath,
 					Scheme:      DefaultScrapeConfig.Scheme,
 
+					EnableCompression: true,
+
 					HTTPClientConfig: config.HTTPClientConfig{
 						TLSConfig: config.TLSConfig{
 							CertFile: filepath.FromSlash("testdata/scrape_configs/valid_cert_file"),
@@ -2158,6 +2192,8 @@ func TestGetScrapeConfigs(t *testing.T) {
 					MetricsPath: DefaultScrapeConfig.MetricsPath,
 					Scheme:      DefaultScrapeConfig.Scheme,
 
+					EnableCompression: true,
+
 					ServiceDiscoveryConfigs: discovery.Configs{
 						&vultr.SDConfig{
 							HTTPClientConfig: config.HTTPClientConfig{
@@ -2210,3 +2246,16 @@ func kubernetesSDHostURL() config.URL {
 	tURL, _ := url.Parse("https://localhost:1234")
 	return config.URL{URL: tURL}
 }
+
+func TestScrapeConfigDisableCompression(t *testing.T) {
+	want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
+	require.NoError(t, err)
+
+	out, err := yaml.Marshal(want)
+
+	require.NoError(t, err)
+	got := &Config{}
+	require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+	require.Equal(t, false, got.ScrapeConfigs[0].EnableCompression)
+}
diff --git a/config/testdata/scrape_config_disable_compression.good.yml b/config/testdata/scrape_config_disable_compression.good.yml
new file mode 100644
index 0000000000..c6320f7dba
--- /dev/null
+++ b/config/testdata/scrape_config_disable_compression.good.yml
@@ -0,0 +1,5 @@
+scrape_configs:
+  - job_name: prometheus
+    static_configs:
+      - targets: ['localhost:8080']
+    enable_compression: false
diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md
index dc4ea12e75..e9ff2b8f2c 100644
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@@ -237,6 +237,10 @@ job_name: <job_name>
 params:
   [ <string>: [<string>, ...] ]
 
+# If enable_compression is set to "false", Prometheus will request uncompressed
+# response from the scraped target.
+[ enable_compression: <boolean> | default = true ]
+
 # Sets the `Authorization` header on every scrape request with the
 # configured username and password.
 # password and password_file are mutually exclusive.
diff --git a/scrape/scrape.go b/scrape/scrape.go
index 1bcc333d82..983bee8378 100644
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@@ -109,6 +109,7 @@ type scrapeLoopOptions struct {
 	scrapeClassicHistograms  bool
 	mrc                      []*relabel.Config
 	cache                    *scrapeCache
+	enableCompression        bool
 }
 
 const maxAheadTime = 10 * time.Minute
@@ -163,6 +164,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
 			offsetSeed,
 			opts.honorTimestamps,
 			opts.trackTimestampsStaleness,
+			opts.enableCompression,
 			opts.sampleLimit,
 			opts.bucketLimit,
 			opts.labelLimits,
@@ -275,6 +277,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
 		}
 		honorLabels              = sp.config.HonorLabels
 		honorTimestamps          = sp.config.HonorTimestamps
+		enableCompression        = sp.config.EnableCompression
 		trackTimestampsStaleness = sp.config.TrackTimestampsStaleness
 		mrc                      = sp.config.MetricRelabelConfigs
 	)
@@ -295,11 +298,12 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
 		interval, timeout, err := t.intervalAndTimeout(interval, timeout)
 		var (
 			s = &targetScraper{
-				Target:        t,
-				client:        sp.client,
-				timeout:       timeout,
-				bodySizeLimit: bodySizeLimit,
-				acceptHeader:  acceptHeader(cfg.ScrapeProtocols),
+				Target:               t,
+				client:               sp.client,
+				timeout:              timeout,
+				bodySizeLimit:        bodySizeLimit,
+				acceptHeader:         acceptHeader(cfg.ScrapeProtocols),
+				acceptEncodingHeader: acceptEncodingHeader(enableCompression),
 			}
 			newLoop = sp.newLoop(scrapeLoopOptions{
 				target:                   t,
@@ -309,6 +313,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
 				labelLimits:              labelLimits,
 				honorLabels:              honorLabels,
 				honorTimestamps:          honorTimestamps,
+				enableCompression:        enableCompression,
 				trackTimestampsStaleness: trackTimestampsStaleness,
 				mrc:                      mrc,
 				cache:                    cache,
@@ -403,6 +408,7 @@ func (sp *scrapePool) sync(targets []*Target) {
 		}
 		honorLabels              = sp.config.HonorLabels
 		honorTimestamps          = sp.config.HonorTimestamps
+		enableCompression        = sp.config.EnableCompression
 		trackTimestampsStaleness = sp.config.TrackTimestampsStaleness
 		mrc                      = sp.config.MetricRelabelConfigs
 		scrapeClassicHistograms  = sp.config.ScrapeClassicHistograms
@@ -419,12 +425,13 @@ func (sp *scrapePool) sync(targets []*Target) {
 			var err error
 			interval, timeout, err = t.intervalAndTimeout(interval, timeout)
 			s := &targetScraper{
-				Target:        t,
-				client:        sp.client,
-				timeout:       timeout,
-				bodySizeLimit: bodySizeLimit,
-				acceptHeader:  acceptHeader(sp.config.ScrapeProtocols),
-				metrics:       sp.metrics,
+				Target:               t,
+				client:               sp.client,
+				timeout:              timeout,
+				bodySizeLimit:        bodySizeLimit,
+				acceptHeader:         acceptHeader(sp.config.ScrapeProtocols),
+				acceptEncodingHeader: acceptEncodingHeader(enableCompression),
+				metrics:              sp.metrics,
 			}
 			l := sp.newLoop(scrapeLoopOptions{
 				target:                   t,
@@ -434,6 +441,7 @@ func (sp *scrapePool) sync(targets []*Target) {
 				labelLimits:              labelLimits,
 				honorLabels:              honorLabels,
 				honorTimestamps:          honorTimestamps,
+				enableCompression:        enableCompression,
 				trackTimestampsStaleness: trackTimestampsStaleness,
 				mrc:                      mrc,
 				interval:                 interval,
@@ -647,8 +655,9 @@ type targetScraper struct {
 	gzipr *gzip.Reader
 	buf   *bufio.Reader
 
-	bodySizeLimit int64
-	acceptHeader  string
+	bodySizeLimit        int64
+	acceptHeader         string
+	acceptEncodingHeader string
 
 	metrics *scrapeMetrics
 }
@@ -670,6 +679,13 @@ func acceptHeader(sps []config.ScrapeProtocol) string {
 	return strings.Join(vals, ",")
 }
 
+func acceptEncodingHeader(enableCompression bool) string {
+	if enableCompression {
+		return "gzip"
+	}
+	return "identity"
+}
+
 var UserAgent = fmt.Sprintf("Prometheus/%s", version.Version)
 
 func (s *targetScraper) scrape(ctx context.Context) (*http.Response, error) {
@@ -679,7 +695,7 @@ func (s *targetScraper) scrape(ctx context.Context) (*http.Response, error) {
 			return nil, err
 		}
 		req.Header.Add("Accept", s.acceptHeader)
-		req.Header.Add("Accept-Encoding", "gzip")
+		req.Header.Add("Accept-Encoding", s.acceptEncodingHeader)
 		req.Header.Set("User-Agent", UserAgent)
 		req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", strconv.FormatFloat(s.timeout.Seconds(), 'f', -1, 64))
 
@@ -765,6 +781,7 @@ type scrapeLoop struct {
 	offsetSeed               uint64
 	honorTimestamps          bool
 	trackTimestampsStaleness bool
+	enableCompression        bool
 	forcedErr                error
 	forcedErrMtx             sync.Mutex
 	sampleLimit              int
@@ -1055,6 +1072,7 @@ func newScrapeLoop(ctx context.Context,
 	offsetSeed uint64,
 	honorTimestamps bool,
 	trackTimestampsStaleness bool,
+	enableCompression bool,
 	sampleLimit int,
 	bucketLimit int,
 	labelLimits *labelLimits,
@@ -1102,6 +1120,7 @@ func newScrapeLoop(ctx context.Context,
 		appenderCtx:              appenderCtx,
 		honorTimestamps:          honorTimestamps,
 		trackTimestampsStaleness: trackTimestampsStaleness,
+		enableCompression:        enableCompression,
 		sampleLimit:              sampleLimit,
 		bucketLimit:              bucketLimit,
 		labelLimits:              labelLimits,
diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index 522d2e1f86..238e90c204 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -651,6 +651,7 @@ func TestScrapeLoopStopBeforeRun(t *testing.T) {
 		nil, nil, 0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		1,
@@ -726,6 +727,7 @@ func TestScrapeLoopStop(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -805,6 +807,7 @@ func TestScrapeLoopRun(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		time.Second,
@@ -863,6 +866,7 @@ func TestScrapeLoopRun(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		time.Second,
@@ -925,6 +929,7 @@ func TestScrapeLoopForcedErr(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		time.Second,
@@ -986,6 +991,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -1046,6 +1052,7 @@ func simpleTestScrapeLoop(t testing.TB) (context.Context, *scrapeLoop) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -1109,6 +1116,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -1190,6 +1198,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -1256,6 +1265,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -1325,6 +1335,7 @@ func TestScrapeLoopCache(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -1411,6 +1422,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -1528,6 +1540,7 @@ func TestScrapeLoopAppend(t *testing.T) {
 			0,
 			true,
 			false,
+			true,
 			0, 0,
 			nil,
 			0,
@@ -1626,7 +1639,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) {
 				},
 				nil,
 				func(ctx context.Context) storage.Appender { return app },
-				nil, 0, true, false, 0, 0, nil, 0, 0, false, false, false, nil, false, newTestScrapeMetrics(t),
+				nil, 0, true, false, true, 0, 0, nil, 0, 0, false, false, false, nil, false, newTestScrapeMetrics(t),
 			)
 			slApp := sl.appender(context.Background())
 			_, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC))
@@ -1658,6 +1671,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -1719,6 +1733,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		app.limit, 0,
 		nil,
 		0,
@@ -1799,6 +1814,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		app.limit, 0,
 		nil,
 		0,
@@ -1900,6 +1916,7 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -1951,6 +1968,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2005,6 +2023,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2049,6 +2068,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) {
 		0,
 		true,
 		true,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2421,6 +2441,7 @@ metric: <
 				0,
 				true,
 				false,
+				true,
 				0, 0,
 				nil,
 				0,
@@ -2511,6 +2532,7 @@ func TestScrapeLoopAppendExemplarSeries(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2566,6 +2588,7 @@ func TestScrapeLoopRunReportsTargetDownOnScrapeError(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -2605,6 +2628,7 @@ func TestScrapeLoopRunReportsTargetDownOnInvalidUTF8(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -2657,6 +2681,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2705,6 +2730,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -2997,6 +3023,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) {
 		nil, 0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -3041,6 +3068,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) {
 		nil, 0,
 		false,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -3084,6 +3112,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -3145,6 +3174,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -3411,6 +3441,7 @@ func TestScrapeAddFast(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		0,
@@ -3500,6 +3531,7 @@ func TestScrapeReportSingleAppender(t *testing.T) {
 		0,
 		true,
 		false,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -3705,6 +3737,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) {
 			0,
 			true,
 			false,
+			true,
 			0, 0,
 			&test.labelLimits,
 			0,
@@ -3911,6 +3944,7 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *
 		0,
 		true,
 		true,
+		true,
 		0, 0,
 		nil,
 		10*time.Millisecond,
@@ -3956,3 +3990,63 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *
 	require.True(t, value.IsStaleNaN(appender.resultFloats[6].f),
 		"Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(appender.resultFloats[6].f))
 }
+
+func TestScrapeLoopCompression(t *testing.T) {
+	simpleStorage := teststorage.New(t)
+	defer simpleStorage.Close()
+
+	metricsText := makeTestMetrics(10)
+
+	for _, tc := range []struct {
+		enableCompression bool
+		acceptEncoding    string
+	}{
+		{
+			enableCompression: true,
+			acceptEncoding:    "gzip",
+		},
+		{
+			enableCompression: false,
+			acceptEncoding:    "identity",
+		},
+	} {
+		t.Run(fmt.Sprintf("compression=%v,acceptEncoding=%s", tc.enableCompression, tc.acceptEncoding), func(t *testing.T) {
+			scraped := make(chan bool)
+
+			ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				require.Equal(t, tc.acceptEncoding, r.Header.Get("Accept-Encoding"), "invalid value of the Accept-Encoding header")
+				fmt.Fprint(w, metricsText)
+				close(scraped)
+			}))
+			defer ts.Close()
+
+			config := &config.ScrapeConfig{
+				JobName:           "test",
+				SampleLimit:       100,
+				Scheme:            "http",
+				ScrapeInterval:    model.Duration(100 * time.Millisecond),
+				ScrapeTimeout:     model.Duration(100 * time.Millisecond),
+				EnableCompression: tc.enableCompression,
+			}
+
+			sp, err := newScrapePool(config, simpleStorage, 0, nil, &Options{}, newTestScrapeMetrics(t))
+			require.NoError(t, err)
+			defer sp.stop()
+
+			testURL, err := url.Parse(ts.URL)
+			require.NoError(t, err)
+			sp.Sync([]*targetgroup.Group{
+				{
+					Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}},
+				},
+			})
+			require.Equal(t, 1, len(sp.ActiveTargets()))
+
+			select {
+			case <-time.After(5 * time.Second):
+				t.Fatalf("target was not scraped")
+			case <-scraped:
+			}
+		})
+	}
+}

From b37258c99b901490ccf22f85f472ba2ba8c8be33 Mon Sep 17 00:00:00 2001
From: Etourneau Gwenn <gwenn.etourneau@gmail.com>
Date: Tue, 21 Nov 2023 20:59:17 +0900
Subject: [PATCH 62/66] Added Caching of network interface for Azure (#12622)

* Added Caching of network interface for Azure

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Rename Counter for Azure cache

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Format with goimports

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Updated duration comparaison
Enabled cache by default with 5x the default refresh time

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Change random function

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Remove refresh interval

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Remove from config as well

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

* Reformat config_test
Removed uneeded error

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>

---------

Signed-off-by: Etourneau Gwenn <getourneau@yugabyte.com>
---
 discovery/azure/azure.go | 55 +++++++++++++++++++++++++++++++++-------
 go.mod                   |  1 +
 go.sum                   |  2 ++
 3 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go
index 23b3cb8c4d..675ff7c217 100644
--- a/discovery/azure/azure.go
+++ b/discovery/azure/azure.go
@@ -17,6 +17,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"math/rand"
 	"net"
 	"net/http"
 	"strings"
@@ -30,10 +31,13 @@ import (
 	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4"
 	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2"
+	cache "github.com/Code-Hex/go-generics-cache"
+	"github.com/Code-Hex/go-generics-cache/policy/lru"
 	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	config_util "github.com/prometheus/common/config"
+
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/common/version"
 
@@ -80,6 +84,11 @@ var (
 			Name: "prometheus_sd_azure_failures_total",
 			Help: "Number of Azure service discovery refresh failures.",
 		})
+	cacheHitCount = prometheus.NewCounter(
+		prometheus.CounterOpts{
+			Name: "prometheus_sd_azure_cache_hit_total",
+			Help: "Number of cache hit during refresh.",
+		})
 )
 
 var environments = map[string]cloud.Configuration{
@@ -105,6 +114,7 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
 func init() {
 	discovery.RegisterConfig(&SDConfig{})
 	prometheus.MustRegister(failuresCount)
+	prometheus.MustRegister(cacheHitCount)
 }
 
 // SDConfig is the configuration for Azure based service discovery.
@@ -145,7 +155,6 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
 	if err != nil {
 		return err
 	}
-
 	if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil {
 		return err
 	}
@@ -174,6 +183,7 @@ type Discovery struct {
 	logger log.Logger
 	cfg    *SDConfig
 	port   int
+	cache  *cache.Cache[string, *armnetwork.Interface]
 }
 
 // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
@@ -181,17 +191,21 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
+	l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
 	d := &Discovery{
 		cfg:    cfg,
 		port:   cfg.Port,
 		logger: logger,
+		cache:  l,
 	}
+
 	d.Discovery = refresh.NewDiscovery(
 		logger,
 		"azure",
 		time.Duration(cfg.RefreshInterval),
 		d.refresh,
 	)
+
 	return d
 }
 
@@ -385,15 +399,22 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
 
 			// Get the IP address information via separate call to the network provider.
 			for _, nicID := range vm.NetworkInterfaces {
-				networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID)
-				if err != nil {
-					if errors.Is(err, errorNotFound) {
-						level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
-					} else {
-						ch <- target{labelSet: nil, err: err}
+				var networkInterface *armnetwork.Interface
+				if v, ok := d.getFromCache(nicID); ok {
+					networkInterface = v
+					cacheHitCount.Add(1)
+				} else {
+					networkInterface, err = client.getNetworkInterfaceByID(ctx, nicID)
+					if err != nil {
+						if errors.Is(err, errorNotFound) {
+							level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
+						} else {
+							ch <- target{labelSet: nil, err: err}
+						}
+						// Get out of this routine because we cannot continue without a network interface.
+						return
 					}
-					// Get out of this routine because we cannot continue without a network interface.
-					return
+					d.addToCache(nicID, networkInterface)
 				}
 
 				if networkInterface.Properties == nil {
@@ -628,3 +649,19 @@ func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkI
 
 	return &resp.Interface, nil
 }
+
+// addToCache will add the network interface information for the specified nicID
+func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
+	random := rand.Int63n(int64(time.Duration(d.cfg.RefreshInterval * 3).Seconds()))
+	rs := time.Duration(random) * time.Second
+	exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
+	d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
+	level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
+}
+
+// getFromCache will get the network Interface for the specified nicID
+// If the cache is disabled nothing will happen
+func (d *Discovery) getFromCache(nicID string) (*armnetwork.Interface, bool) {
+	net, found := d.cache.Get(nicID)
+	return net, found
+}
diff --git a/go.mod b/go.mod
index 629dd11472..6cc2d02370 100644
--- a/go.mod
+++ b/go.mod
@@ -110,6 +110,7 @@ require (
 )
 
 require (
+	github.com/Code-Hex/go-generics-cache v1.3.1
 	github.com/Microsoft/go-winio v0.6.1 // indirect
 	github.com/armon/go-metrics v0.4.1 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
diff --git a/go.sum b/go.sum
index e8f2013a70..01804516bb 100644
--- a/go.sum
+++ b/go.sum
@@ -54,6 +54,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 h1:WpB/QDNLpMw
 github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/Code-Hex/go-generics-cache v1.3.1 h1:i8rLwyhoyhaerr7JpjtYjJZUcCbWOdiYO3fZXLiEC4g=
+github.com/Code-Hex/go-generics-cache v1.3.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1vxyillCVzX13KZG8dl4=
 github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
 github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
 github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=

From 2329fba0e59d8c38f940d3c5a3e8954268aa9ec1 Mon Sep 17 00:00:00 2001
From: Charles Korn <charleskorn@users.noreply.github.com>
Date: Thu, 23 Nov 2023 20:07:23 +1100
Subject: [PATCH 63/66] Fix linting issues in comments (#13178)

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 discovery/azure/azure.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go
index 675ff7c217..faccadcf85 100644
--- a/discovery/azure/azure.go
+++ b/discovery/azure/azure.go
@@ -650,7 +650,7 @@ func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkI
 	return &resp.Interface, nil
 }
 
-// addToCache will add the network interface information for the specified nicID
+// addToCache will add the network interface information for the specified nicID.
 func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
 	random := rand.Int63n(int64(time.Duration(d.cfg.RefreshInterval * 3).Seconds()))
 	rs := time.Duration(random) * time.Second
@@ -660,7 +660,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
 }
 
 // getFromCache will get the network Interface for the specified nicID
-// If the cache is disabled nothing will happen
+// If the cache is disabled nothing will happen.
 func (d *Discovery) getFromCache(nicID string) (*armnetwork.Interface, bool) {
 	net, found := d.cache.Get(nicID)
 	return net, found

From 0102425af102bddb7be64af9abb93af7ba5193ea Mon Sep 17 00:00:00 2001
From: Paulin Todev <paulin.todev@gmail.com>
Date: Thu, 23 Nov 2023 11:24:08 +0000
Subject: [PATCH 64/66] Use only one scrapeMetrics object per test. (#13051)

The scrape loop and scrape cache should use the same instance.
This brings the tests' behavior more in line with production.

Signed-off-by: Paulin Todev <paulin.todev@gmail.com>
---
 scrape/scrape_test.go | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index 238e90c204..5e4f3f30c7 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -792,8 +792,9 @@ func TestScrapeLoopRun(t *testing.T) {
 		signal = make(chan struct{}, 1)
 		errc   = make(chan error)
 
-		scraper = &testScraper{}
-		app     = func(ctx context.Context) storage.Appender { return &nopAppender{} }
+		scraper       = &testScraper{}
+		app           = func(ctx context.Context) storage.Appender { return &nopAppender{} }
+		scrapeMetrics = newTestScrapeMetrics(t)
 	)
 
 	ctx, cancel := context.WithCancel(context.Background())
@@ -817,7 +818,7 @@ func TestScrapeLoopRun(t *testing.T) {
 		false,
 		nil,
 		false,
-		newTestScrapeMetrics(t),
+		scrapeMetrics,
 	)
 
 	// The loop must terminate during the initial offset if the context
@@ -876,7 +877,7 @@ func TestScrapeLoopRun(t *testing.T) {
 		false,
 		nil,
 		false,
-		newTestScrapeMetrics(t),
+		scrapeMetrics,
 	)
 
 	go func() {
@@ -974,9 +975,10 @@ func TestScrapeLoopForcedErr(t *testing.T) {
 
 func TestScrapeLoopMetadata(t *testing.T) {
 	var (
-		signal  = make(chan struct{})
-		scraper = &testScraper{}
-		cache   = newScrapeCache(newTestScrapeMetrics(t))
+		signal        = make(chan struct{})
+		scraper       = &testScraper{}
+		scrapeMetrics = newTestScrapeMetrics(t)
+		cache         = newScrapeCache(scrapeMetrics)
 	)
 	defer close(signal)
 
@@ -1001,7 +1003,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
 		false,
 		nil,
 		false,
-		newTestScrapeMetrics(t),
+		scrapeMetrics,
 	)
 	defer cancel()
 

From 35a15e8f04b3b5716da73c0f4faf1c2be34b79a0 Mon Sep 17 00:00:00 2001
From: Filip Petkovski <filip.petkovsky@gmail.com>
Date: Thu, 23 Nov 2023 15:09:17 +0100
Subject: [PATCH 65/66] Add benchmark for native histograms (#13160)

* Add benchmark for native histograms

This commit adds a PromQL benchmark for queries on native histograms.

Signed-off-by: Filip Petkovski <filip.petkovsky@gmail.com>
---
 promql/bench_test.go | 95 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/promql/bench_test.go b/promql/bench_test.go
index 8e443b5a6a..13eba3714e 100644
--- a/promql/bench_test.go
+++ b/promql/bench_test.go
@@ -21,9 +21,11 @@ import (
 	"testing"
 	"time"
 
+	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/prometheus/util/teststorage"
 )
 
@@ -269,6 +271,99 @@ func BenchmarkRangeQuery(b *testing.B) {
 	}
 }
 
+func BenchmarkNativeHistograms(b *testing.B) {
+	testStorage := teststorage.New(b)
+	defer testStorage.Close()
+
+	app := testStorage.Appender(context.TODO())
+	if err := generateNativeHistogramSeries(app, 3000); err != nil {
+		b.Fatal(err)
+	}
+	if err := app.Commit(); err != nil {
+		b.Fatal(err)
+	}
+
+	start := time.Unix(0, 0)
+	end := start.Add(2 * time.Hour)
+	step := time.Second * 30
+
+	cases := []struct {
+		name  string
+		query string
+	}{
+		{
+			name:  "sum",
+			query: "sum(native_histogram_series)",
+		},
+		{
+			name:  "sum rate",
+			query: "sum(rate(native_histogram_series[1m]))",
+		},
+	}
+
+	opts := EngineOpts{
+		Logger:               nil,
+		Reg:                  nil,
+		MaxSamples:           50000000,
+		Timeout:              100 * time.Second,
+		EnableAtModifier:     true,
+		EnableNegativeOffset: true,
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for _, tc := range cases {
+		b.Run(tc.name, func(b *testing.B) {
+			ng := NewEngine(opts)
+			for i := 0; i < b.N; i++ {
+				qry, err := ng.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step)
+				if err != nil {
+					b.Fatal(err)
+				}
+				if result := qry.Exec(context.Background()); result.Err != nil {
+					b.Fatal(result.Err)
+				}
+			}
+		})
+	}
+}
+
+func generateNativeHistogramSeries(app storage.Appender, numSeries int) error {
+	commonLabels := []string{labels.MetricName, "native_histogram_series", "foo", "bar"}
+	series := make([][]*histogram.Histogram, numSeries)
+	for i := range series {
+		series[i] = tsdbutil.GenerateTestHistograms(2000)
+	}
+	higherSchemaHist := &histogram.Histogram{
+		Schema: 3,
+		PositiveSpans: []histogram.Span{
+			{Offset: -5, Length: 2}, // -5 -4
+			{Offset: 2, Length: 3},  // -1 0 1
+			{Offset: 2, Length: 2},  // 4 5
+		},
+		PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 3},
+		Count:           13,
+	}
+	for sid, histograms := range series {
+		seriesLabels := labels.FromStrings(append(commonLabels, "h", strconv.Itoa(sid))...)
+		for i := range histograms {
+			ts := time.Unix(int64(i*15), 0).UnixMilli()
+			if i == 0 {
+				// Inject a histogram with a higher schema.
+				if _, err := app.AppendHistogram(0, seriesLabels, ts, higherSchemaHist, nil); err != nil {
+					return err
+				}
+			}
+			if _, err := app.AppendHistogram(0, seriesLabels, ts, histograms[i], nil); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
 func BenchmarkParser(b *testing.B) {
 	cases := []string{
 		"a",

From 59844498f7b12f16c7f004aa951bbb14cdb83991 Mon Sep 17 00:00:00 2001
From: Charles Korn <charleskorn@users.noreply.github.com>
Date: Fri, 24 Nov 2023 22:38:38 +1100
Subject: [PATCH 66/66] Fix issue where queries can fail or omit OOO samples if
 OOO head compaction occurs between creating a querier and reading chunks
 (#13115)

* Add failing test.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Don't run OOO head garbage collection while reads are running.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Add further test cases for different order of operations.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Ensure all queriers are closed if `DB.blockChunkQuerierForRange()` fails.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Ensure all queriers are closed if `DB.Querier()` fails.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Invert error handling in `DB.Querier()` and `DB.blockChunkQuerierForRange()` to make it clearer

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Ensure that queries that touch OOO data can't block OOO head garbage collection forever.

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Address PR feedback: fix parameter name in comment

Co-authored-by: Jesus Vazquez <jesusvazquez@users.noreply.github.com>
Signed-off-by: Charles Korn <charleskorn@users.noreply.github.com>

* Address PR feedback: use `lastGarbageCollectedMmapRef`

Signed-off-by: Charles Korn <charles.korn@grafana.com>

* Address PR feedback: ensure pending reads are cleaned up if creating an OOO querier fails

Signed-off-by: Charles Korn <charles.korn@grafana.com>

---------

Signed-off-by: Charles Korn <charles.korn@grafana.com>
Signed-off-by: Charles Korn <charleskorn@users.noreply.github.com>
Co-authored-by: Jesus Vazquez <jesusvazquez@users.noreply.github.com>
---
 tsdb/db.go                 | 128 +++++++++++-------
 tsdb/db_test.go            | 259 +++++++++++++++++++++++++++++++++++++
 tsdb/head.go               |  25 +++-
 tsdb/ooo_head.go           |  18 ++-
 tsdb/ooo_head_read.go      |  44 ++++---
 tsdb/ooo_head_read_test.go |  17 ++-
 tsdb/ooo_isolation.go      |  79 +++++++++++
 tsdb/ooo_isolation_test.go |  60 +++++++++
 tsdb/querier_test.go       |   2 +-
 9 files changed, 546 insertions(+), 86 deletions(-)
 create mode 100644 tsdb/ooo_isolation.go
 create mode 100644 tsdb/ooo_isolation_test.go

diff --git a/tsdb/db.go b/tsdb/db.go
index c4c05e3901..2e3801a9e0 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -203,10 +203,14 @@ type DB struct {
 	compactor      Compactor
 	blocksToDelete BlocksToDeleteFunc
 
-	// Mutex for that must be held when modifying the general block layout.
+	// Mutex for that must be held when modifying the general block layout or lastGarbageCollectedMmapRef.
 	mtx    sync.RWMutex
 	blocks []*Block
 
+	// The last OOO chunk that was compacted and written to disk. New queriers must not read chunks less
+	// than or equal to this reference, as these chunks could be garbage collected at any time.
+	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
+
 	head *Head
 
 	compactc chan struct{}
@@ -1243,6 +1247,20 @@ func (db *DB) compactOOOHead(ctx context.Context) error {
 
 	lastWBLFile, minOOOMmapRef := oooHead.LastWBLFile(), oooHead.LastMmapRef()
 	if lastWBLFile != 0 || minOOOMmapRef != 0 {
+		if minOOOMmapRef != 0 {
+			// Ensure that no more queriers are created that will reference chunks we're about to garbage collect.
+			// truncateOOO waits for any existing queriers that reference chunks we're about to garbage collect to
+			// complete before running garbage collection, so we don't need to do that here.
+			//
+			// We take mtx to ensure that Querier() and ChunkQuerier() don't miss blocks: without this, they could
+			// capture the list of blocks before the call to reloadBlocks() above runs, but then capture
+			// lastGarbageCollectedMmapRef after we update it here, and therefore not query either the blocks we've just
+			// written or the head chunks those blocks were created from.
+			db.mtx.Lock()
+			db.lastGarbageCollectedMmapRef = minOOOMmapRef
+			db.mtx.Unlock()
+		}
+
 		if err := db.head.truncateOOO(lastWBLFile, minOOOMmapRef); err != nil {
 			return errors.Wrap(err, "truncate ooo wbl")
 		}
@@ -1869,7 +1887,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error {
 }
 
 // Querier returns a new querier over the data partition for the given time range.
-func (db *DB) Querier(mint, maxt int64) (storage.Querier, error) {
+func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 	var blocks []BlockReader
 
 	db.mtx.RLock()
@@ -1880,11 +1898,23 @@ func (db *DB) Querier(mint, maxt int64) (storage.Querier, error) {
 			blocks = append(blocks, b)
 		}
 	}
-	var inOrderHeadQuerier storage.Querier
+
+	blockQueriers := make([]storage.Querier, 0, len(blocks)+2) // +2 to allow for possible in-order and OOO head queriers
+
+	defer func() {
+		if err != nil {
+			// If we fail, all previously opened queriers must be closed.
+			for _, q := range blockQueriers {
+				// TODO(bwplotka): Handle error.
+				_ = q.Close()
+			}
+		}
+	}()
+
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
 		var err error
-		inOrderHeadQuerier, err = NewBlockQuerier(rh, mint, maxt)
+		inOrderHeadQuerier, err := NewBlockQuerier(rh, mint, maxt)
 		if err != nil {
 			return nil, errors.Wrapf(err, "open block querier for head %s", rh)
 		}
@@ -1906,44 +1936,40 @@ func (db *DB) Querier(mint, maxt int64) (storage.Querier, error) {
 				return nil, errors.Wrapf(err, "open block querier for head while getting new querier %s", rh)
 			}
 		}
+
+		if inOrderHeadQuerier != nil {
+			blockQueriers = append(blockQueriers, inOrderHeadQuerier)
+		}
 	}
 
-	var outOfOrderHeadQuerier storage.Querier
 	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-		rh := NewOOORangeHead(db.head, mint, maxt)
+		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
 		var err error
-		outOfOrderHeadQuerier, err = NewBlockQuerier(rh, mint, maxt)
+		outOfOrderHeadQuerier, err := NewBlockQuerier(rh, mint, maxt)
 		if err != nil {
+			// If NewBlockQuerier() failed, make sure to clean up the pending read created by NewOOORangeHead.
+			rh.isoState.Close()
+
 			return nil, errors.Wrapf(err, "open block querier for ooo head %s", rh)
 		}
-	}
 
-	blockQueriers := make([]storage.Querier, 0, len(blocks))
-	for _, b := range blocks {
-		q, err := NewBlockQuerier(b, mint, maxt)
-		if err == nil {
-			blockQueriers = append(blockQueriers, q)
-			continue
-		}
-		// If we fail, all previously opened queriers must be closed.
-		for _, q := range blockQueriers {
-			// TODO(bwplotka): Handle error.
-			_ = q.Close()
-		}
-		return nil, errors.Wrapf(err, "open querier for block %s", b)
-	}
-	if inOrderHeadQuerier != nil {
-		blockQueriers = append(blockQueriers, inOrderHeadQuerier)
-	}
-	if outOfOrderHeadQuerier != nil {
 		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
 	}
+
+	for _, b := range blocks {
+		q, err := NewBlockQuerier(b, mint, maxt)
+		if err != nil {
+			return nil, errors.Wrapf(err, "open querier for block %s", b)
+		}
+		blockQueriers = append(blockQueriers, q)
+	}
+
 	return storage.NewMergeQuerier(blockQueriers, nil, storage.ChainedSeriesMerge), nil
 }
 
 // blockChunkQuerierForRange returns individual block chunk queriers from the persistent blocks, in-order head block, and the
 // out-of-order head block, overlapping with the given time range.
-func (db *DB) blockChunkQuerierForRange(mint, maxt int64) ([]storage.ChunkQuerier, error) {
+func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuerier, err error) {
 	var blocks []BlockReader
 
 	db.mtx.RLock()
@@ -1954,11 +1980,22 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) ([]storage.ChunkQuerie
 			blocks = append(blocks, b)
 		}
 	}
-	var inOrderHeadQuerier storage.ChunkQuerier
+
+	blockQueriers := make([]storage.ChunkQuerier, 0, len(blocks)+2) // +2 to allow for possible in-order and OOO head queriers
+
+	defer func() {
+		if err != nil {
+			// If we fail, all previously opened queriers must be closed.
+			for _, q := range blockQueriers {
+				// TODO(bwplotka): Handle error.
+				_ = q.Close()
+			}
+		}
+	}()
+
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
-		var err error
-		inOrderHeadQuerier, err = NewBlockChunkQuerier(rh, mint, maxt)
+		inOrderHeadQuerier, err := NewBlockChunkQuerier(rh, mint, maxt)
 		if err != nil {
 			return nil, errors.Wrapf(err, "open querier for head %s", rh)
 		}
@@ -1980,37 +2017,28 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) ([]storage.ChunkQuerie
 				return nil, errors.Wrapf(err, "open querier for head while getting new querier %s", rh)
 			}
 		}
+
+		if inOrderHeadQuerier != nil {
+			blockQueriers = append(blockQueriers, inOrderHeadQuerier)
+		}
 	}
 
-	var outOfOrderHeadQuerier storage.ChunkQuerier
 	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-		rh := NewOOORangeHead(db.head, mint, maxt)
-		var err error
-		outOfOrderHeadQuerier, err = NewBlockChunkQuerier(rh, mint, maxt)
+		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
+		outOfOrderHeadQuerier, err := NewBlockChunkQuerier(rh, mint, maxt)
 		if err != nil {
 			return nil, errors.Wrapf(err, "open block chunk querier for ooo head %s", rh)
 		}
+
+		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
 	}
 
-	blockQueriers := make([]storage.ChunkQuerier, 0, len(blocks))
 	for _, b := range blocks {
 		q, err := NewBlockChunkQuerier(b, mint, maxt)
-		if err == nil {
-			blockQueriers = append(blockQueriers, q)
-			continue
+		if err != nil {
+			return nil, errors.Wrapf(err, "open querier for block %s", b)
 		}
-		// If we fail, all previously opened queriers must be closed.
-		for _, q := range blockQueriers {
-			// TODO(bwplotka): Handle error.
-			_ = q.Close()
-		}
-		return nil, errors.Wrapf(err, "open querier for block %s", b)
-	}
-	if inOrderHeadQuerier != nil {
-		blockQueriers = append(blockQueriers, inOrderHeadQuerier)
-	}
-	if outOfOrderHeadQuerier != nil {
-		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
+		blockQueriers = append(blockQueriers, q)
 	}
 
 	return blockQueriers, nil
diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index c7ea068d60..5728b49bd0 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -38,6 +38,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/require"
+	"go.uber.org/atomic"
 	"go.uber.org/goleak"
 
 	"github.com/prometheus/prometheus/config"
@@ -3611,6 +3612,264 @@ func testChunkQuerierShouldNotPanicIfHeadChunkIsTruncatedWhileReadingQueriedChun
 	}
 }
 
+func TestQuerierShouldNotFailIfOOOCompactionOccursAfterRetrievingQuerier(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 3 * DefaultBlockDuration
+	db := openTestDB(t, opts, nil)
+	defer func() {
+		require.NoError(t, db.Close())
+	}()
+
+	// Disable compactions so we can control it.
+	db.DisableCompactions()
+
+	metric := labels.FromStrings(labels.MetricName, "test_metric")
+	ctx := context.Background()
+	interval := int64(15 * time.Second / time.Millisecond)
+	ts := int64(0)
+	samplesWritten := 0
+
+	// Capture the first timestamp - this will be the timestamp of the OOO sample we'll append below.
+	oooTS := ts
+	ts += interval
+
+	// Push samples after the OOO sample we'll write below.
+	for ; ts < 10*interval; ts += interval {
+		app := db.Appender(ctx)
+		_, err := app.Append(0, metric, ts, float64(ts))
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+		samplesWritten++
+	}
+
+	// Push a single OOO sample.
+	app := db.Appender(ctx)
+	_, err := app.Append(0, metric, oooTS, float64(ts))
+	require.NoError(t, err)
+	require.NoError(t, app.Commit())
+	samplesWritten++
+
+	// Get a querier.
+	querierCreatedBeforeCompaction, err := db.ChunkQuerier(0, math.MaxInt64)
+	require.NoError(t, err)
+
+	// Start OOO head compaction.
+	compactionComplete := atomic.NewBool(false)
+	go func() {
+		defer compactionComplete.Store(true)
+
+		require.NoError(t, db.CompactOOOHead(ctx))
+		require.Equal(t, float64(1), prom_testutil.ToFloat64(db.Head().metrics.chunksRemoved))
+	}()
+
+	// Give CompactOOOHead time to start work.
+	// If it does not wait for querierCreatedBeforeCompaction to be closed, then the query will return incorrect results or fail.
+	time.Sleep(time.Second)
+	require.False(t, compactionComplete.Load(), "compaction completed before reading chunks or closing querier created before compaction")
+
+	// Get another querier. This one should only use the compacted blocks from disk and ignore the chunks that will be garbage collected.
+	querierCreatedAfterCompaction, err := db.ChunkQuerier(0, math.MaxInt64)
+	require.NoError(t, err)
+
+	testQuerier := func(q storage.ChunkQuerier) {
+		// Query back the series.
+		hints := &storage.SelectHints{Start: 0, End: math.MaxInt64, Step: interval}
+		seriesSet := q.Select(ctx, true, hints, labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "test_metric"))
+
+		// Collect the iterator for the series.
+		var iterators []chunks.Iterator
+		for seriesSet.Next() {
+			iterators = append(iterators, seriesSet.At().Iterator(nil))
+		}
+		require.NoError(t, seriesSet.Err())
+		require.Len(t, iterators, 1)
+		iterator := iterators[0]
+
+		// Check that we can still successfully read all samples.
+		samplesRead := 0
+		for iterator.Next() {
+			samplesRead += iterator.At().Chunk.NumSamples()
+		}
+
+		require.NoError(t, iterator.Err())
+		require.Equal(t, samplesWritten, samplesRead)
+	}
+
+	testQuerier(querierCreatedBeforeCompaction)
+
+	require.False(t, compactionComplete.Load(), "compaction completed before closing querier created before compaction")
+	require.NoError(t, querierCreatedBeforeCompaction.Close())
+	require.Eventually(t, compactionComplete.Load, time.Second, 10*time.Millisecond, "compaction should complete after querier created before compaction was closed, and not wait for querier created after compaction")
+
+	// Use the querier created after compaction and confirm it returns the expected results (ie. from the disk block created from OOO head and in-order head) without error.
+	testQuerier(querierCreatedAfterCompaction)
+	require.NoError(t, querierCreatedAfterCompaction.Close())
+}
+
+func TestQuerierShouldNotFailIfOOOCompactionOccursAfterSelecting(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 3 * DefaultBlockDuration
+	db := openTestDB(t, opts, nil)
+	defer func() {
+		require.NoError(t, db.Close())
+	}()
+
+	// Disable compactions so we can control it.
+	db.DisableCompactions()
+
+	metric := labels.FromStrings(labels.MetricName, "test_metric")
+	ctx := context.Background()
+	interval := int64(15 * time.Second / time.Millisecond)
+	ts := int64(0)
+	samplesWritten := 0
+
+	// Capture the first timestamp - this will be the timestamp of the OOO sample we'll append below.
+	oooTS := ts
+	ts += interval
+
+	// Push samples after the OOO sample we'll write below.
+	for ; ts < 10*interval; ts += interval {
+		app := db.Appender(ctx)
+		_, err := app.Append(0, metric, ts, float64(ts))
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+		samplesWritten++
+	}
+
+	// Push a single OOO sample.
+	app := db.Appender(ctx)
+	_, err := app.Append(0, metric, oooTS, float64(ts))
+	require.NoError(t, err)
+	require.NoError(t, app.Commit())
+	samplesWritten++
+
+	// Get a querier.
+	querier, err := db.ChunkQuerier(0, math.MaxInt64)
+	require.NoError(t, err)
+
+	// Query back the series.
+	hints := &storage.SelectHints{Start: 0, End: math.MaxInt64, Step: interval}
+	seriesSet := querier.Select(ctx, true, hints, labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "test_metric"))
+
+	// Start OOO head compaction.
+	compactionComplete := atomic.NewBool(false)
+	go func() {
+		defer compactionComplete.Store(true)
+
+		require.NoError(t, db.CompactOOOHead(ctx))
+		require.Equal(t, float64(1), prom_testutil.ToFloat64(db.Head().metrics.chunksRemoved))
+	}()
+
+	// Give CompactOOOHead time to start work.
+	// If it does not wait for the querier to be closed, then the query will return incorrect results or fail.
+	time.Sleep(time.Second)
+	require.False(t, compactionComplete.Load(), "compaction completed before reading chunks or closing querier")
+
+	// Collect the iterator for the series.
+	var iterators []chunks.Iterator
+	for seriesSet.Next() {
+		iterators = append(iterators, seriesSet.At().Iterator(nil))
+	}
+	require.NoError(t, seriesSet.Err())
+	require.Len(t, iterators, 1)
+	iterator := iterators[0]
+
+	// Check that we can still successfully read all samples.
+	samplesRead := 0
+	for iterator.Next() {
+		samplesRead += iterator.At().Chunk.NumSamples()
+	}
+
+	require.NoError(t, iterator.Err())
+	require.Equal(t, samplesWritten, samplesRead)
+
+	require.False(t, compactionComplete.Load(), "compaction completed before closing querier")
+	require.NoError(t, querier.Close())
+	require.Eventually(t, compactionComplete.Load, time.Second, 10*time.Millisecond, "compaction should complete after querier was closed")
+}
+
+func TestQuerierShouldNotFailIfOOOCompactionOccursAfterRetrievingIterators(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 3 * DefaultBlockDuration
+	db := openTestDB(t, opts, nil)
+	defer func() {
+		require.NoError(t, db.Close())
+	}()
+
+	// Disable compactions so we can control it.
+	db.DisableCompactions()
+
+	metric := labels.FromStrings(labels.MetricName, "test_metric")
+	ctx := context.Background()
+	interval := int64(15 * time.Second / time.Millisecond)
+	ts := int64(0)
+	samplesWritten := 0
+
+	// Capture the first timestamp - this will be the timestamp of the OOO sample we'll append below.
+	oooTS := ts
+	ts += interval
+
+	// Push samples after the OOO sample we'll write below.
+	for ; ts < 10*interval; ts += interval {
+		app := db.Appender(ctx)
+		_, err := app.Append(0, metric, ts, float64(ts))
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+		samplesWritten++
+	}
+
+	// Push a single OOO sample.
+	app := db.Appender(ctx)
+	_, err := app.Append(0, metric, oooTS, float64(ts))
+	require.NoError(t, err)
+	require.NoError(t, app.Commit())
+	samplesWritten++
+
+	// Get a querier.
+	querier, err := db.ChunkQuerier(0, math.MaxInt64)
+	require.NoError(t, err)
+
+	// Query back the series.
+	hints := &storage.SelectHints{Start: 0, End: math.MaxInt64, Step: interval}
+	seriesSet := querier.Select(ctx, true, hints, labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "test_metric"))
+
+	// Collect the iterator for the series.
+	var iterators []chunks.Iterator
+	for seriesSet.Next() {
+		iterators = append(iterators, seriesSet.At().Iterator(nil))
+	}
+	require.NoError(t, seriesSet.Err())
+	require.Len(t, iterators, 1)
+	iterator := iterators[0]
+
+	// Start OOO head compaction.
+	compactionComplete := atomic.NewBool(false)
+	go func() {
+		defer compactionComplete.Store(true)
+
+		require.NoError(t, db.CompactOOOHead(ctx))
+		require.Equal(t, float64(1), prom_testutil.ToFloat64(db.Head().metrics.chunksRemoved))
+	}()
+
+	// Give CompactOOOHead time to start work.
+	// If it does not wait for the querier to be closed, then the query will return incorrect results or fail.
+	time.Sleep(time.Second)
+	require.False(t, compactionComplete.Load(), "compaction completed before reading chunks or closing querier")
+
+	// Check that we can still successfully read all samples.
+	samplesRead := 0
+	for iterator.Next() {
+		samplesRead += iterator.At().Chunk.NumSamples()
+	}
+
+	require.NoError(t, iterator.Err())
+	require.Equal(t, samplesWritten, samplesRead)
+
+	require.False(t, compactionComplete.Load(), "compaction completed before closing querier")
+	require.NoError(t, querier.Close())
+	require.Eventually(t, compactionComplete.Load, time.Second, 10*time.Millisecond, "compaction should complete after querier was closed")
+}
+
 func newTestDB(t *testing.T) *DB {
 	dir := t.TempDir()
 
diff --git a/tsdb/head.go b/tsdb/head.go
index 419340506d..bf181a4158 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -106,6 +106,8 @@ type Head struct {
 
 	iso *isolation
 
+	oooIso *oooIsolation
+
 	cardinalityMutex      sync.Mutex
 	cardinalityCache      *index.PostingsStats // Posting stats cache which will expire after 30sec.
 	lastPostingsStatsCall time.Duration        // Last posting stats call (PostingsCardinalityStats()) time for caching.
@@ -300,6 +302,7 @@ func (h *Head) resetInMemoryState() error {
 	}
 
 	h.iso = newIsolation(h.opts.IsolationDisabled)
+	h.oooIso = newOOOIsolation()
 
 	h.exemplarMetrics = em
 	h.exemplars = es
@@ -1133,6 +1136,14 @@ func (h *Head) WaitForPendingReadersInTimeRange(mint, maxt int64) {
 	}
 }
 
+// WaitForPendingReadersForOOOChunksAtOrBefore is like WaitForPendingReadersInTimeRange, except it waits for
+// queries touching OOO chunks less than or equal to chunk to finish querying.
+func (h *Head) WaitForPendingReadersForOOOChunksAtOrBefore(chunk chunks.ChunkDiskMapperRef) {
+	for h.oooIso.HasOpenReadsAtOrBefore(chunk) {
+		time.Sleep(500 * time.Millisecond)
+	}
+}
+
 // WaitForAppendersOverlapping waits for appends overlapping maxt to finish.
 func (h *Head) WaitForAppendersOverlapping(maxt int64) {
 	for maxt >= h.iso.lowestAppendTime() {
@@ -1271,13 +1282,19 @@ func (h *Head) truncateWAL(mint int64) error {
 }
 
 // truncateOOO
+//   - waits for any pending reads that potentially touch chunks less than or equal to newMinOOOMmapRef
 //   - truncates the OOO WBL files whose index is strictly less than lastWBLFile.
-//   - garbage collects all the m-map chunks from the memory that are less than or equal to minOOOMmapRef
+//   - garbage collects all the m-map chunks from the memory that are less than or equal to newMinOOOMmapRef
 //     and then deletes the series that do not have any data anymore.
-func (h *Head) truncateOOO(lastWBLFile int, minOOOMmapRef chunks.ChunkDiskMapperRef) error {
+//
+// The caller is responsible for ensuring that no further queriers will be created that reference chunks less
+// than or equal to newMinOOOMmapRef before calling truncateOOO.
+func (h *Head) truncateOOO(lastWBLFile int, newMinOOOMmapRef chunks.ChunkDiskMapperRef) error {
 	curMinOOOMmapRef := chunks.ChunkDiskMapperRef(h.minOOOMmapRef.Load())
-	if minOOOMmapRef.GreaterThan(curMinOOOMmapRef) {
-		h.minOOOMmapRef.Store(uint64(minOOOMmapRef))
+	if newMinOOOMmapRef.GreaterThan(curMinOOOMmapRef) {
+		h.WaitForPendingReadersForOOOChunksAtOrBefore(newMinOOOMmapRef)
+		h.minOOOMmapRef.Store(uint64(newMinOOOMmapRef))
+
 		if err := h.truncateSeriesAndChunkDiskMapper("truncateOOO"); err != nil {
 			return err
 		}
diff --git a/tsdb/ooo_head.go b/tsdb/ooo_head.go
index 1251af4a97..7f2110fa65 100644
--- a/tsdb/ooo_head.go
+++ b/tsdb/ooo_head.go
@@ -20,6 +20,7 @@ import (
 	"github.com/oklog/ulid"
 
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/chunks"
 	"github.com/prometheus/prometheus/tsdb/tombstones"
 )
 
@@ -113,22 +114,27 @@ type OOORangeHead struct {
 	// the timerange of the query and having preexisting pointers to the first
 	// and last timestamp help with that.
 	mint, maxt int64
+
+	isoState *oooIsolationState
 }
 
-func NewOOORangeHead(head *Head, mint, maxt int64) *OOORangeHead {
+func NewOOORangeHead(head *Head, mint, maxt int64, minRef chunks.ChunkDiskMapperRef) *OOORangeHead {
+	isoState := head.oooIso.TrackReadAfter(minRef)
+
 	return &OOORangeHead{
-		head: head,
-		mint: mint,
-		maxt: maxt,
+		head:     head,
+		mint:     mint,
+		maxt:     maxt,
+		isoState: isoState,
 	}
 }
 
 func (oh *OOORangeHead) Index() (IndexReader, error) {
-	return NewOOOHeadIndexReader(oh.head, oh.mint, oh.maxt), nil
+	return NewOOOHeadIndexReader(oh.head, oh.mint, oh.maxt, oh.isoState.minRef), nil
 }
 
 func (oh *OOORangeHead) Chunks() (ChunkReader, error) {
-	return NewOOOHeadChunkReader(oh.head, oh.mint, oh.maxt), nil
+	return NewOOOHeadChunkReader(oh.head, oh.mint, oh.maxt, oh.isoState), nil
 }
 
 func (oh *OOORangeHead) Tombstones() (tombstones.Reader, error) {
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index b9c2dc4a50..ace2326576 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -38,26 +38,29 @@ var _ IndexReader = &OOOHeadIndexReader{}
 // decided to do this to avoid code duplication.
 // The only methods that change are the ones about getting Series and Postings.
 type OOOHeadIndexReader struct {
-	*headIndexReader // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
+	*headIndexReader            // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
+	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
 }
 
-func NewOOOHeadIndexReader(head *Head, mint, maxt int64) *OOOHeadIndexReader {
+func NewOOOHeadIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *OOOHeadIndexReader {
 	hr := &headIndexReader{
 		head: head,
 		mint: mint,
 		maxt: maxt,
 	}
-	return &OOOHeadIndexReader{hr}
+	return &OOOHeadIndexReader{hr, lastGarbageCollectedMmapRef}
 }
 
 func (oh *OOOHeadIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
-	return oh.series(ref, builder, chks, 0)
+	return oh.series(ref, builder, chks, oh.lastGarbageCollectedMmapRef, 0)
 }
 
-// The passed lastMmapRef tells upto what max m-map chunk that we can consider.
-// If it is 0, it means all chunks need to be considered.
-// If it is non-0, then the oooHeadChunk must not be considered.
-func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta, lastMmapRef chunks.ChunkDiskMapperRef) error {
+// lastGarbageCollectedMmapRef gives the last mmap chunk that may be being garbage collected and so
+// any chunk at or before this ref will not be considered. 0 disables this check.
+//
+// maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then
+// the oooHeadChunk will not be considered.
+func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef) error {
 	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
 
 	if s == nil {
@@ -112,14 +115,14 @@ func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.Scra
 	// so we can set the correct markers.
 	if s.ooo.oooHeadChunk != nil {
 		c := s.ooo.oooHeadChunk
-		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && lastMmapRef == 0 {
+		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && maxMmapRef == 0 {
 			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.ooo.oooMmappedChunks))))
 			addChunk(c.minTime, c.maxTime, ref)
 		}
 	}
 	for i := len(s.ooo.oooMmappedChunks) - 1; i >= 0; i-- {
 		c := s.ooo.oooMmappedChunks[i]
-		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && (lastMmapRef == 0 || lastMmapRef.GreaterThanOrEqualTo(c.ref)) {
+		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && (maxMmapRef == 0 || maxMmapRef.GreaterThanOrEqualTo(c.ref)) && (lastGarbageCollectedMmapRef == 0 || c.ref.GreaterThan(lastGarbageCollectedMmapRef)) {
 			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i)))
 			addChunk(c.minTime, c.maxTime, ref)
 		}
@@ -232,13 +235,15 @@ func (oh *OOOHeadIndexReader) Postings(ctx context.Context, name string, values
 type OOOHeadChunkReader struct {
 	head       *Head
 	mint, maxt int64
+	isoState   *oooIsolationState
 }
 
-func NewOOOHeadChunkReader(head *Head, mint, maxt int64) *OOOHeadChunkReader {
+func NewOOOHeadChunkReader(head *Head, mint, maxt int64, isoState *oooIsolationState) *OOOHeadChunkReader {
 	return &OOOHeadChunkReader{
-		head: head,
-		mint: mint,
-		maxt: maxt,
+		head:     head,
+		mint:     mint,
+		maxt:     maxt,
+		isoState: isoState,
 	}
 }
 
@@ -272,6 +277,9 @@ func (cr OOOHeadChunkReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
 }
 
 func (cr OOOHeadChunkReader) Close() error {
+	if cr.isoState != nil {
+		cr.isoState.Close()
+	}
 	return nil
 }
 
@@ -306,7 +314,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead,
 		ch.lastWBLFile = lastWBLFile
 	}
 
-	ch.oooIR = NewOOOHeadIndexReader(head, math.MinInt64, math.MaxInt64)
+	ch.oooIR = NewOOOHeadIndexReader(head, math.MinInt64, math.MaxInt64, 0)
 	n, v := index.AllPostingsKey()
 
 	// TODO: verify this gets only ooo samples.
@@ -365,7 +373,7 @@ func (ch *OOOCompactionHead) Index() (IndexReader, error) {
 }
 
 func (ch *OOOCompactionHead) Chunks() (ChunkReader, error) {
-	return NewOOOHeadChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt), nil
+	return NewOOOHeadChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt, nil), nil
 }
 
 func (ch *OOOCompactionHead) Tombstones() (tombstones.Reader, error) {
@@ -391,7 +399,7 @@ func (ch *OOOCompactionHead) Meta() BlockMeta {
 // Only the method of BlockReader interface are valid for the cloned OOOCompactionHead.
 func (ch *OOOCompactionHead) CloneForTimeRange(mint, maxt int64) *OOOCompactionHead {
 	return &OOOCompactionHead{
-		oooIR:       NewOOOHeadIndexReader(ch.oooIR.head, mint, maxt),
+		oooIR:       NewOOOHeadIndexReader(ch.oooIR.head, mint, maxt, 0),
 		lastMmapRef: ch.lastMmapRef,
 		postings:    ch.postings,
 		chunkRange:  ch.chunkRange,
@@ -433,7 +441,7 @@ func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.P
 }
 
 func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
-	return ir.ch.oooIR.series(ref, builder, chks, ir.ch.lastMmapRef)
+	return ir.ch.oooIR.series(ref, builder, chks, 0, ir.ch.lastMmapRef)
 }
 
 func (ir *OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index e74a7f9ded..3f4b9bae70 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -356,7 +356,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 						})
 					}
 
-					ir := NewOOOHeadIndexReader(h, tc.queryMinT, tc.queryMaxT)
+					ir := NewOOOHeadIndexReader(h, tc.queryMinT, tc.queryMaxT, 0)
 
 					var chks []chunks.Meta
 					var b labels.ScratchBuilder
@@ -437,7 +437,7 @@ func TestOOOHeadChunkReader_LabelValues(t *testing.T) {
 	for _, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
 			// We first want to test using a head index reader that covers the biggest query interval
-			oh := NewOOOHeadIndexReader(head, tc.queryMinT, tc.queryMaxT)
+			oh := NewOOOHeadIndexReader(head, tc.queryMinT, tc.queryMaxT, 0)
 			matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")}
 			values, err := oh.LabelValues(ctx, "foo", matchers...)
 			sort.Strings(values)
@@ -484,7 +484,8 @@ func TestOOOHeadChunkReader_Chunk(t *testing.T) {
 	t.Run("Getting a non existing chunk fails with not found error", func(t *testing.T) {
 		db := newTestDBWithOpts(t, opts)
 
-		cr := NewOOOHeadChunkReader(db.head, 0, 1000)
+		cr := NewOOOHeadChunkReader(db.head, 0, 1000, nil)
+		defer cr.Close()
 		c, err := cr.Chunk(chunks.Meta{
 			Ref: 0x1000000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300,
 		})
@@ -842,14 +843,15 @@ func TestOOOHeadChunkReader_Chunk(t *testing.T) {
 
 			// The Series method is the one that populates the chunk meta OOO
 			// markers like OOOLastRef. These are then used by the ChunkReader.
-			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT)
+			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
 			var chks []chunks.Meta
 			var b labels.ScratchBuilder
 			err := ir.Series(s1Ref, &b, &chks)
 			require.NoError(t, err)
 			require.Equal(t, len(tc.expChunksSamples), len(chks))
 
-			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT)
+			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil)
+			defer cr.Close()
 			for i := 0; i < len(chks); i++ {
 				c, err := cr.Chunk(chks[i])
 				require.NoError(t, err)
@@ -1005,7 +1007,7 @@ func TestOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(
 
 			// The Series method is the one that populates the chunk meta OOO
 			// markers like OOOLastRef. These are then used by the ChunkReader.
-			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT)
+			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
 			var chks []chunks.Meta
 			var b labels.ScratchBuilder
 			err := ir.Series(s1Ref, &b, &chks)
@@ -1020,7 +1022,8 @@ func TestOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(
 			}
 			require.NoError(t, app.Commit())
 
-			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT)
+			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil)
+			defer cr.Close()
 			for i := 0; i < len(chks); i++ {
 				c, err := cr.Chunk(chks[i])
 				require.NoError(t, err)
diff --git a/tsdb/ooo_isolation.go b/tsdb/ooo_isolation.go
new file mode 100644
index 0000000000..3e3e165a0a
--- /dev/null
+++ b/tsdb/ooo_isolation.go
@@ -0,0 +1,79 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"container/list"
+	"sync"
+
+	"github.com/prometheus/prometheus/tsdb/chunks"
+)
+
+type oooIsolation struct {
+	mtx       sync.RWMutex
+	openReads *list.List
+}
+
+type oooIsolationState struct {
+	i *oooIsolation
+	e *list.Element
+
+	minRef chunks.ChunkDiskMapperRef
+}
+
+func newOOOIsolation() *oooIsolation {
+	return &oooIsolation{
+		openReads: list.New(),
+	}
+}
+
+// HasOpenReadsAtOrBefore returns true if this oooIsolation is aware of any reads that use
+// chunks with reference at or before ref.
+func (i *oooIsolation) HasOpenReadsAtOrBefore(ref chunks.ChunkDiskMapperRef) bool {
+	i.mtx.RLock()
+	defer i.mtx.RUnlock()
+
+	for e := i.openReads.Front(); e != nil; e = e.Next() {
+		s := e.Value.(*oooIsolationState)
+
+		if ref.GreaterThan(s.minRef) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// TrackReadAfter records a read that uses chunks with reference after minRef.
+//
+// The caller must ensure that the returned oooIsolationState is eventually closed when
+// the read is complete.
+func (i *oooIsolation) TrackReadAfter(minRef chunks.ChunkDiskMapperRef) *oooIsolationState {
+	s := &oooIsolationState{
+		i:      i,
+		minRef: minRef,
+	}
+
+	i.mtx.Lock()
+	s.e = i.openReads.PushBack(s)
+	i.mtx.Unlock()
+
+	return s
+}
+
+func (s oooIsolationState) Close() {
+	s.i.mtx.Lock()
+	s.i.openReads.Remove(s.e)
+	s.i.mtx.Unlock()
+}
diff --git a/tsdb/ooo_isolation_test.go b/tsdb/ooo_isolation_test.go
new file mode 100644
index 0000000000..4ff0488ab1
--- /dev/null
+++ b/tsdb/ooo_isolation_test.go
@@ -0,0 +1,60 @@
+// Copyright 2023 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestOOOIsolation(t *testing.T) {
+	i := newOOOIsolation()
+
+	// Empty state shouldn't have any open reads.
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.False(t, i.HasOpenReadsAtOrBefore(1))
+	require.False(t, i.HasOpenReadsAtOrBefore(2))
+	require.False(t, i.HasOpenReadsAtOrBefore(3))
+
+	// Add a read.
+	read1 := i.TrackReadAfter(1)
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.False(t, i.HasOpenReadsAtOrBefore(1))
+	require.True(t, i.HasOpenReadsAtOrBefore(2))
+
+	// Add another overlapping read.
+	read2 := i.TrackReadAfter(0)
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.True(t, i.HasOpenReadsAtOrBefore(1))
+	require.True(t, i.HasOpenReadsAtOrBefore(2))
+
+	// Close the second read, should now only report open reads for the first read's ref.
+	read2.Close()
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.False(t, i.HasOpenReadsAtOrBefore(1))
+	require.True(t, i.HasOpenReadsAtOrBefore(2))
+
+	// Close the second read again: this should do nothing and ensures we can safely call Close() multiple times.
+	read2.Close()
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.False(t, i.HasOpenReadsAtOrBefore(1))
+	require.True(t, i.HasOpenReadsAtOrBefore(2))
+
+	// Closing the first read should indicate no further open reads.
+	read1.Close()
+	require.False(t, i.HasOpenReadsAtOrBefore(0))
+	require.False(t, i.HasOpenReadsAtOrBefore(1))
+	require.False(t, i.HasOpenReadsAtOrBefore(2))
+}
diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go
index 3c27ab2f3c..7260d9d8bd 100644
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@@ -2803,7 +2803,7 @@ func BenchmarkQueries(b *testing.B) {
 
 					qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples)
 					require.NoError(b, err)
-					qOOOHead, err := NewBlockQuerier(NewOOORangeHead(head, 1, nSamples), 1, nSamples)
+					qOOOHead, err := NewBlockQuerier(NewOOORangeHead(head, 1, nSamples, 0), 1, nSamples)
 					require.NoError(b, err)
 
 					queryTypes = append(queryTypes, qt{