From fb6a45f06bbe474096162d33c6ac8afcbbb71f45 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Wed, 17 Jan 2024 18:28:06 +0100
Subject: [PATCH 01/51] tsdb/wlog: Only treat unknown record types as failure

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 CHANGELOG.md         |  1 +
 tsdb/wlog/watcher.go | 10 ++++------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dfcc5c333..e17124abe5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@
 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991
 * [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991
 * [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991
+* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042
 
 ## 2.52.0-rc.1 / 2024-05-03
 
diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go
index 8ebd9249aa..fd4f5f20f2 100644
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@@ -685,14 +685,12 @@ func (w *Watcher) readSegmentForGC(r *LiveReader, segmentNum int, _ bool) error
 			}
 			w.writer.UpdateSeriesSegment(series, segmentNum)
 
-		// Ignore these; we're only interested in series.
-		case record.Samples:
-		case record.Exemplars:
-		case record.Tombstones:
-
-		default:
+		case record.Unknown:
 			// Could be corruption, or reading from a WAL from a newer Prometheus.
 			w.recordDecodeFailsMetric.Inc()
+
+		default:
+			// We're only interested in series.
 		}
 	}
 	if err := r.Err(); err != nil {

From 694f717dc44849592b439fce6ffa5fbcdf7957a6 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Tue, 28 May 2024 15:23:50 +0200
Subject: [PATCH 02/51] Watcher.readSegment: Only consider unknown rec types
 failures

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 tsdb/wlog/watcher.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go
index fd4f5f20f2..5a73acdd49 100644
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@@ -625,6 +625,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 				w.writer.AppendHistograms(histogramsToSend)
 				histogramsToSend = histogramsToSend[:0]
 			}
+
 		case record.FloatHistogramSamples:
 			// Skip if experimental "histograms over remote write" is not enabled.
 			if !w.sendHistograms {
@@ -652,11 +653,13 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 				w.writer.AppendFloatHistograms(floatHistogramsToSend)
 				floatHistogramsToSend = floatHistogramsToSend[:0]
 			}
-		case record.Tombstones:
 
-		default:
+		case record.Unknown:
 			// Could be corruption, or reading from a WAL from a newer Prometheus.
 			w.recordDecodeFailsMetric.Inc()
+
+		default:
+			// We're not interested in other types of records.
 		}
 	}
 	if err := r.Err(); err != nil {

From 5ee94f49a22c9c9df338f7acc27044fe75f078af Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Wed, 7 Aug 2024 15:30:01 +1000
Subject: [PATCH 03/51] Fix issue where `sum` over mixed exponential and custom
 buckets, or incompatible custom buckets, produces incorrect results

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 promql/engine.go                              | 19 +++++++++----
 .../testdata/native_histograms.test           | 27 +++++++++++++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/promql/engine.go b/promql/engine.go
index 14c3706062..efb6c583fb 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -2784,6 +2784,7 @@ type groupedAggregation struct {
 	seen              bool // Was this output groups seen in the input at this timestamp.
 	hasFloat          bool // Has at least 1 float64 sample aggregated.
 	hasHistogram      bool // Has at least 1 histogram sample aggregated.
+	abandonHistogram  bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets.
 	groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group.
 	incrementalMean   bool // True after reverting to incremental calculation of the mean value.
 }
@@ -2809,10 +2810,11 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 		// Initialize this group if it's the first time we've seen it.
 		if !group.seen {
 			*group = groupedAggregation{
-				seen:       true,
-				floatValue: f,
-				floatMean:  f,
-				groupCount: 1,
+				seen:             true,
+				floatValue:       f,
+				floatMean:        f,
+				abandonHistogram: false,
+				groupCount:       1,
 			}
 			switch op {
 			case parser.AVG, parser.SUM:
@@ -2833,6 +2835,10 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 			continue
 		}
 
+		if group.abandonHistogram {
+			continue
+		}
+
 		switch op {
 		case parser.SUM:
 			if h != nil {
@@ -2841,6 +2847,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 					_, err := group.histogramValue.Add(h)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
+						group.abandonHistogram = true
 					}
 				}
 				// Otherwise the aggregation contained floats
@@ -2987,7 +2994,9 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(e.Expr.PositionRange()))
 				continue
 			}
-			if aggr.hasHistogram {
+			if aggr.abandonHistogram {
+				continue
+			} else if aggr.hasHistogram {
 				aggr.histogramValue.Compact(0)
 			} else {
 				aggr.floatValue += aggr.floatKahanC
diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test
index 034d73eb51..fc0517f75e 100644
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@@ -762,3 +762,30 @@ eval_warn instant at 30s rate(some_metric[30s])
 # Test the case where we have more than two points for rate
 eval_warn instant at 1m rate(some_metric[1m])
     {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}}
+
+# Test mixing exponential and custom buckets.
+load 6m
+  metric{series="exponential"}         {{sum:4 count:3 buckets:[1 2 1]}}  _                                                                 {{sum:4 count:3 buckets:[1 2 1]}}
+  metric{series="other-exponential"}   {{sum:3 count:2 buckets:[1 1 1]}}  _                                                                 {{sum:3 count:2 buckets:[1 1 1]}}
+  metric{series="custom"}              _                                  {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}     {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
+  metric{series="other-custom"}        _                                  {{schema:-53 sum:15 count:2 custom_values:[5 10] buckets:[0 2]}}  {{schema:-53 sum:15 count:2 custom_values:[5 10] buckets:[0 2]}}
+
+# T=0: only exponential
+# T=6: only custom
+# T=12: mixed, should be ignored and emit an warning
+eval_warn range from 0 to 12m step 6m sum(metric)
+  {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _
+
+clear
+
+# Test incompatible custom bucket schemas.
+load 6m
+  metric{series="1"} _                                                             {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
+  metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}}    _                                                             {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}}
+  metric{series="3"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
+
+# T=0: incompatible, should be ignored and emit a warning
+# T=6: compatible
+# T=12: incompatible followed by compatible, should be ignored and emit a warning
+eval_warn range from 0 to 12m step 6m sum(metric)
+  {} _ {{schema:-53 sum:2 count:2 custom_values:[5 10] buckets:[2]}} _

From f07b3ae67be5620cef9ff5520ba44d94f1216cb9 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Wed, 7 Aug 2024 15:32:35 +1000
Subject: [PATCH 04/51] Fix issue where `avg` over mixed exponential and custom
 buckets, or incompatible custom buckets, produces incorrect results or panics

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 promql/engine.go                                  | 6 ++++++
 promql/promqltest/testdata/native_histograms.test | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/promql/engine.go b/promql/engine.go
index efb6c583fb..d51ed92c56 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -2868,10 +2868,14 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 					toAdd, err := left.Sub(right)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
+						group.abandonHistogram = true
+						continue
 					}
 					_, err = group.histogramValue.Add(toAdd)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
+						group.abandonHistogram = true
+						continue
 					}
 				}
 				// Otherwise the aggregation contained floats
@@ -2968,6 +2972,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				continue
 			}
 			switch {
+			case aggr.abandonHistogram:
+				continue
 			case aggr.hasHistogram:
 				aggr.histogramValue = aggr.histogramValue.Compact(0)
 			case aggr.incrementalMean:
diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test
index fc0517f75e..62fac87c14 100644
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@@ -776,6 +776,9 @@ load 6m
 eval_warn range from 0 to 12m step 6m sum(metric)
   {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _
 
+eval_warn range from 0 to 12m step 6m avg(metric)
+  {} {{sum:3.5 count:2.5 buckets:[1 1.5 1]}} {{schema:-53 sum:8 count:1.5 custom_values:[5 10] buckets:[0.5 1]}} _
+
 clear
 
 # Test incompatible custom bucket schemas.
@@ -789,3 +792,6 @@ load 6m
 # T=12: incompatible followed by compatible, should be ignored and emit a warning
 eval_warn range from 0 to 12m step 6m sum(metric)
   {} _ {{schema:-53 sum:2 count:2 custom_values:[5 10] buckets:[2]}} _
+
+eval_warn range from 0 to 12m step 6m avg(metric)
+  {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _

From 0f4bc87b4fde3b4d9483a62a6b4f8fe3286c84bd Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Wed, 7 Aug 2024 15:35:06 +1000
Subject: [PATCH 05/51] Make linter happy

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 promql/engine.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/promql/engine.go b/promql/engine.go
index d51ed92c56..6f0c64d420 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -3000,11 +3000,12 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(e.Expr.PositionRange()))
 				continue
 			}
-			if aggr.abandonHistogram {
+			switch {
+			case aggr.abandonHistogram:
 				continue
-			} else if aggr.hasHistogram {
+			case aggr.hasHistogram:
 				aggr.histogramValue.Compact(0)
-			} else {
+			default:
 				aggr.floatValue += aggr.floatKahanC
 			}
 		default:

From 82bb35fabb609b9da87c6c15931917486ca8911a Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Fri, 9 Aug 2024 13:51:31 +1000
Subject: [PATCH 06/51] Address PR feedback: fix typo and rename variable

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 promql/engine.go                              | 34 +++++++++----------
 .../testdata/native_histograms.test           |  2 +-
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/promql/engine.go b/promql/engine.go
index 6f0c64d420..b20690a6d6 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -2781,12 +2781,12 @@ type groupedAggregation struct {
 	heap           vectorByValueHeap
 
 	// All bools together for better packing within the struct.
-	seen              bool // Was this output groups seen in the input at this timestamp.
-	hasFloat          bool // Has at least 1 float64 sample aggregated.
-	hasHistogram      bool // Has at least 1 histogram sample aggregated.
-	abandonHistogram  bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets.
-	groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group.
-	incrementalMean   bool // True after reverting to incremental calculation of the mean value.
+	seen                   bool // Was this output groups seen in the input at this timestamp.
+	hasFloat               bool // Has at least 1 float64 sample aggregated.
+	hasHistogram           bool // Has at least 1 histogram sample aggregated.
+	incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets.
+	groupAggrComplete      bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group.
+	incrementalMean        bool // True after reverting to incremental calculation of the mean value.
 }
 
 // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix.
@@ -2810,11 +2810,11 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 		// Initialize this group if it's the first time we've seen it.
 		if !group.seen {
 			*group = groupedAggregation{
-				seen:             true,
-				floatValue:       f,
-				floatMean:        f,
-				abandonHistogram: false,
-				groupCount:       1,
+				seen:                   true,
+				floatValue:             f,
+				floatMean:              f,
+				incompatibleHistograms: false,
+				groupCount:             1,
 			}
 			switch op {
 			case parser.AVG, parser.SUM:
@@ -2835,7 +2835,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 			continue
 		}
 
-		if group.abandonHistogram {
+		if group.incompatibleHistograms {
 			continue
 		}
 
@@ -2847,7 +2847,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 					_, err := group.histogramValue.Add(h)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
-						group.abandonHistogram = true
+						group.incompatibleHistograms = true
 					}
 				}
 				// Otherwise the aggregation contained floats
@@ -2868,13 +2868,13 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 					toAdd, err := left.Sub(right)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
-						group.abandonHistogram = true
+						group.incompatibleHistograms = true
 						continue
 					}
 					_, err = group.histogramValue.Add(toAdd)
 					if err != nil {
 						handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos)
-						group.abandonHistogram = true
+						group.incompatibleHistograms = true
 						continue
 					}
 				}
@@ -2972,7 +2972,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				continue
 			}
 			switch {
-			case aggr.abandonHistogram:
+			case aggr.incompatibleHistograms:
 				continue
 			case aggr.hasHistogram:
 				aggr.histogramValue = aggr.histogramValue.Compact(0)
@@ -3001,7 +3001,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
 				continue
 			}
 			switch {
-			case aggr.abandonHistogram:
+			case aggr.incompatibleHistograms:
 				continue
 			case aggr.hasHistogram:
 				aggr.histogramValue.Compact(0)
diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test
index 62fac87c14..09b02f6419 100644
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@@ -772,7 +772,7 @@ load 6m
 
 # T=0: only exponential
 # T=6: only custom
-# T=12: mixed, should be ignored and emit an warning
+# T=12: mixed, should be ignored and emit a warning
 eval_warn range from 0 to 12m step 6m sum(metric)
   {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _
 

From 5cfdde327c2176da01b7f418d3521e5682231340 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Fri, 9 Aug 2024 13:57:37 +1000
Subject: [PATCH 07/51] Address PR feedback: add extra test case

Signed-off-by: Charles Korn <charles.korn@grafana.com>
---
 .../testdata/native_histograms.test           | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test
index 09b02f6419..bb99afd476 100644
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@@ -795,3 +795,24 @@ eval_warn range from 0 to 12m step 6m sum(metric)
 
 eval_warn range from 0 to 12m step 6m avg(metric)
   {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _
+
+clear
+
+load 1m
+  metric{group="just-floats", series="1"} 2
+  metric{group="just-floats", series="2"} 3
+  metric{group="just-exponential-histograms", series="1"} {{sum:3 count:4 buckets:[1 2 1]}}
+  metric{group="just-exponential-histograms", series="2"} {{sum:2 count:3 buckets:[1 1 1]}}
+  metric{group="just-custom-histograms", series="1"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}}
+  metric{group="just-custom-histograms", series="2"} {{schema:-53 sum:3 count:4 custom_values:[2] buckets:[7]}}
+  metric{group="floats-and-histograms", series="1"} 2
+  metric{group="floats-and-histograms", series="2"} {{sum:2 count:3 buckets:[1 1 1]}}
+  metric{group="exponential-and-custom-histograms", series="1"} {{sum:2 count:3 buckets:[1 1 1]}}
+  metric{group="exponential-and-custom-histograms", series="2"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
+  metric{group="incompatible-custom-histograms", series="1"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}}
+  metric{group="incompatible-custom-histograms", series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}}
+
+eval_warn instant at 0 sum by (group) (metric)
+  {group="just-floats"} 5
+  {group="just-exponential-histograms"} {{sum:5 count:7 buckets:[2 3 2]}}
+  {group="just-custom-histograms"} {{schema:-53 sum:4 count:5 custom_values:[2] buckets:[8]}}

From 82f38d3e9a6b4eaa7a1ea804762c7da8ae44e930 Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Fri, 9 Aug 2024 14:53:40 +0200
Subject: [PATCH 08/51] fix(tsdb/db_test.go): close the corrupted chunk after
 creating it to satisfy Windows FS

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 tsdb/db_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index cf41e25f27..5943489ffd 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -2690,8 +2690,9 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) {
 		require.NoError(t, db.Close())
 
 		// Simulate a corrupted chunk: without a header.
-		_, err := os.Create(path.Join(mmappedChunksDir(db.dir), "000001"))
+		chunk, err := os.Create(path.Join(mmappedChunksDir(db.dir), "000001"))
 		require.NoError(t, err)
+		require.NoError(t, chunk.Close())
 
 		spinUpQuerierAndCheck(db.dir, t.TempDir(), 1)
 

From 94ad489328623fd9a4df33e498138b48d5363c0c Mon Sep 17 00:00:00 2001
From: suntala <arati.rana@grafana.com>
Date: Sun, 11 Aug 2024 21:07:54 +0200
Subject: [PATCH 09/51] Fall back to comparing by label set when sorting by
 label

Co-authored-by: Aleks Fazlieva <britishrum@users.noreply.github.com>
Signed-off-by: suntala <arati.rana@grafana.com>
---
 promql/functions.go                       | 10 +++++-----
 promql/promqltest/testdata/functions.test | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index 018023bf02..353d4155a8 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -406,10 +406,10 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel
 
 // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
-	// In case the labels are the same, NaN should sort to the bottom, so take
-	// ascending sort with NaN first and reverse it.
-	var anno annotations.Annotations
-	vals[0], anno = funcSort(vals, args, enh)
+	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
+		return labels.Compare(a.Metric, b.Metric)
+	})
+
 	labels := stringSliceFromArgs(args[1:])
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
 		// Iterate over each given label
@@ -431,7 +431,7 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		return 0
 	})
 
-	return vals[0].(Vector), anno
+	return vals[0].(Vector), nil
 }
 
 // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test
index b89d44fced..177afca353 100644
--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@@ -523,16 +523,16 @@ load 5m
 	node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 0+10x10
 
 eval_ordered instant at 50m sort_by_label(http_requests, "instance")
-	http_requests{group="production", instance="0", job="api-server"} 100
 	http_requests{group="canary", instance="0", job="api-server"} 300
-	http_requests{group="production", instance="0", job="app-server"} 500
 	http_requests{group="canary", instance="0", job="app-server"} 700
-	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
 	http_requests{group="canary", instance="1", job="api-server"} 400
-	http_requests{group="production", instance="1", job="app-server"} 600
 	http_requests{group="canary", instance="1", job="app-server"} 800
-	http_requests{group="production", instance="2", job="api-server"} 100
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
 	http_requests{group="canary", instance="2", job="api-server"} NaN
+	http_requests{group="production", instance="2", job="api-server"} 100
 
 eval_ordered instant at 50m sort_by_label(http_requests, "instance", "group")
 	http_requests{group="canary", instance="0", job="api-server"} 300

From fd2f44af7fa48c5b57df51883b6f69c2e73a4809 Mon Sep 17 00:00:00 2001
From: suntala <arati.rana@grafana.com>
Date: Sun, 11 Aug 2024 21:24:09 +0200
Subject: [PATCH 10/51] Fall back to comparing by label set when sorting by
 label desc

Co-authored-by: Aleks Fazlieva <britishrum@users.noreply.github.com>
Signed-off-by: suntala <arati.rana@grafana.com>
---
 promql/functions.go                       | 10 +++++-----
 promql/promqltest/testdata/functions.test |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index 353d4155a8..e9bfe45f4a 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -436,10 +436,10 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode
 
 // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
-	// In case the labels are the same, NaN should sort to the bottom, so take
-	// ascending sort with NaN first and reverse it.
-	var anno annotations.Annotations
-	vals[0], anno = funcSortDesc(vals, args, enh)
+	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
+		return labels.Compare(b.Metric, a.Metric)
+	})
+
 	labels := stringSliceFromArgs(args[1:])
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
 		// Iterate over each given label
@@ -461,7 +461,7 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval
 		return 0
 	})
 
-	return vals[0].(Vector), anno
+	return vals[0].(Vector), nil
 }
 
 // === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) ===
diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test
index 177afca353..6e2b3630bc 100644
--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@@ -585,14 +585,14 @@ eval_ordered instant at 50m sort_by_label(http_requests, "job", "instance", "gro
 eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance")
 	http_requests{group="production", instance="2", job="api-server"} 100
 	http_requests{group="canary", instance="2", job="api-server"} NaN
-	http_requests{group="canary", instance="1", job="app-server"} 800
 	http_requests{group="production", instance="1", job="app-server"} 600
-	http_requests{group="canary", instance="1", job="api-server"} 400
 	http_requests{group="production", instance="1", job="api-server"} 200
-	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="canary", instance="1", job="api-server"} 400
 	http_requests{group="production", instance="0", job="app-server"} 500
-	http_requests{group="canary", instance="0", job="api-server"} 300
 	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="0", job="api-server"} 300
 
 eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group")
 	http_requests{group="production", instance="2", job="api-server"} 100

From 532904a1d6c34ec2e7064bc0ceb959a6b63acaf7 Mon Sep 17 00:00:00 2001
From: suntala <arati.rana@grafana.com>
Date: Sun, 11 Aug 2024 21:34:39 +0200
Subject: [PATCH 11/51] Document changes to sort by label

Co-authored-by: Aleks Fazlieva <britishrum@users.noreply.github.com>
Signed-off-by: suntala <arati.rana@grafana.com>
---
 docs/querying/functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/querying/functions.md b/docs/querying/functions.md
index ee81328b5e..951b90a68e 100644
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@@ -619,7 +619,7 @@ Like `sort`, `sort_desc` only affects the results of instant queries, as range q
 
 **This function has to be enabled via the [feature flag](../feature_flags.md) `--enable-feature=promql-experimental-functions`.**
 
-`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order.
+`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by the values of the given labels in ascending order. In case these label values are equal, elements are sorted by their full label sets.
 
 Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
 

From 7694c89497c72fd3b163fc101612b464a763b542 Mon Sep 17 00:00:00 2001
From: Lukasz Mierzwa <lukasz@cloudflare.com>
Date: Mon, 12 Aug 2024 14:01:20 +0100
Subject: [PATCH 12/51] Increase TestHangingNotifier timeout

This test keeps timing out on our arm64 CI server, it does use a very slow timeout and that 5ms doesn't seem to be enough.
But it 10x.

Signed-off-by: Lukasz Mierzwa <lukasz@cloudflare.com>
---
 notifier/notifier_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go
index 2cdaa9e06d..cf922a537c 100644
--- a/notifier/notifier_test.go
+++ b/notifier/notifier_test.go
@@ -711,7 +711,7 @@ func TestHangingNotifier(t *testing.T) {
 	)
 
 	var (
-		sendTimeout = 10 * time.Millisecond
+		sendTimeout = 100 * time.Millisecond
 		sdUpdatert  = sendTimeout / 2
 
 		done = make(chan struct{})

From 0c852680bf921036624f6672b7814ad380a99222 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Sat, 29 Jun 2024 17:49:49 +0100
Subject: [PATCH 13/51] [Benchmark] TSDB: Add
 BenchmarkQuerierSelectWithOutOfOrder

Refactor existing BenchmarkQuerierSelect to provide the set-up.

Note that Head queries now run faster because they use a RangeHead.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/block.go              |   5 ++
 tsdb/querier_bench_test.go | 106 +++++++++++++++++++++++++------------
 2 files changed, 76 insertions(+), 35 deletions(-)

diff --git a/tsdb/block.go b/tsdb/block.go
index 2f32733f8c..c55e22ce51 100644
--- a/tsdb/block.go
+++ b/tsdb/block.go
@@ -467,6 +467,11 @@ func (pb *Block) setCompactionFailed() error {
 	return nil
 }
 
+// Querier implements Queryable.
+func (pb *Block) Querier(mint, maxt int64) (storage.Querier, error) {
+	return NewBlockQuerier(pb, mint, maxt)
+}
+
 type blockIndexReader struct {
 	ir IndexReader
 	b  *Block
diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go
index 9a82302420..e3e457d07a 100644
--- a/tsdb/querier_bench_test.go
+++ b/tsdb/querier_bench_test.go
@@ -20,6 +20,7 @@ import (
 	"testing"
 
 	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb/index"
 
 	"github.com/stretchr/testify/require"
@@ -254,56 +255,91 @@ func BenchmarkMergedStringIter(b *testing.B) {
 	b.ReportAllocs()
 }
 
-func BenchmarkQuerierSelect(b *testing.B) {
-	opts := DefaultHeadOptions()
-	opts.ChunkRange = 1000
-	opts.ChunkDirRoot = b.TempDir()
-	h, err := NewHead(nil, nil, nil, nil, opts, nil)
+func createHeadForBenchmarkSelect(b *testing.B, numSeries int, addSeries func(app storage.Appender, i int)) (*Head, *DB) {
+	dir := b.TempDir()
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 255
+	opts.OutOfOrderTimeWindow = 1000
+	db, err := Open(dir, nil, nil, opts, nil)
 	require.NoError(b, err)
-	defer h.Close()
+	b.Cleanup(func() {
+		require.NoError(b, db.Close())
+	})
+	h := db.Head()
+
 	app := h.Appender(context.Background())
-	numSeries := 1000000
 	for i := 0; i < numSeries; i++ {
-		app.Append(0, labels.FromStrings("foo", "bar", "i", fmt.Sprintf("%d%s", i, postingsBenchSuffix)), int64(i), 0)
+		addSeries(app, i)
 	}
 	require.NoError(b, app.Commit())
+	return h, db
+}
 
-	bench := func(b *testing.B, br BlockReader, sorted bool) {
-		matcher := labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")
-		for s := 1; s <= numSeries; s *= 10 {
-			b.Run(fmt.Sprintf("%dof%d", s, numSeries), func(b *testing.B) {
-				q, err := NewBlockQuerier(br, 0, int64(s-1))
-				require.NoError(b, err)
+func benchmarkSelect(b *testing.B, queryable storage.Queryable, numSeries int, sorted bool) {
+	matcher := labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")
+	b.ResetTimer()
+	for s := 1; s <= numSeries; s *= 10 {
+		b.Run(fmt.Sprintf("%dof%d", s, numSeries), func(b *testing.B) {
+			q, err := queryable.Querier(0, int64(s-1))
+			require.NoError(b, err)
 
-				b.ResetTimer()
-				for i := 0; i < b.N; i++ {
-					ss := q.Select(context.Background(), sorted, nil, matcher)
-					for ss.Next() {
-					}
-					require.NoError(b, ss.Err())
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				ss := q.Select(context.Background(), sorted, nil, matcher)
+				for ss.Next() {
 				}
-				q.Close()
-			})
-		}
+				require.NoError(b, ss.Err())
+			}
+			q.Close()
+		})
 	}
+}
+
+func BenchmarkQuerierSelect(b *testing.B) {
+	numSeries := 1000000
+	h, db := createHeadForBenchmarkSelect(b, numSeries, func(app storage.Appender, i int) {
+		_, err := app.Append(0, labels.FromStrings("foo", "bar", "i", fmt.Sprintf("%d%s", i, postingsBenchSuffix)), int64(i), 0)
+		if err != nil {
+			b.Fatal(err)
+		}
+	})
 
 	b.Run("Head", func(b *testing.B) {
-		bench(b, h, false)
+		benchmarkSelect(b, db, numSeries, false)
 	})
 	b.Run("SortedHead", func(b *testing.B) {
-		bench(b, h, true)
+		benchmarkSelect(b, db, numSeries, true)
 	})
 
-	tmpdir := b.TempDir()
-
-	blockdir := createBlockFromHead(b, tmpdir, h)
-	block, err := OpenBlock(nil, blockdir, nil)
-	require.NoError(b, err)
-	defer func() {
-		require.NoError(b, block.Close())
-	}()
-
 	b.Run("Block", func(b *testing.B) {
-		bench(b, block, false)
+		tmpdir := b.TempDir()
+
+		blockdir := createBlockFromHead(b, tmpdir, h)
+		block, err := OpenBlock(nil, blockdir, nil)
+		require.NoError(b, err)
+		defer func() {
+			require.NoError(b, block.Close())
+		}()
+
+		benchmarkSelect(b, block, numSeries, false)
+	})
+}
+
+func BenchmarkQuerierSelectWithOutOfOrder(b *testing.B) {
+	numSeries := 1000000
+	_, db := createHeadForBenchmarkSelect(b, numSeries, func(app storage.Appender, i int) {
+		l := labels.FromStrings("foo", "bar", "i", fmt.Sprintf("%d%s", i, postingsBenchSuffix))
+		ref, err := app.Append(0, l, int64(i+1), 0)
+		if err != nil {
+			b.Fatal(err)
+		}
+		_, err = app.Append(ref, l, int64(i), 1) // Out of order sample
+		if err != nil {
+			b.Fatal(err)
+		}
+	})
+
+	b.Run("Head", func(b *testing.B) {
+		benchmarkSelect(b, db, numSeries, false)
 	})
 }

From c75c8f8329758f82279d62b483a50c2fae00c283 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 24 Jun 2024 21:06:50 +0100
Subject: [PATCH 14/51] Refactoring: extract getSeriesChunks

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head_read.go | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index c8b394be8a..ff9345fa00 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -200,9 +200,15 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchB
 
 	*chks = (*chks)[:0]
 
+	getSeriesChunks(s, h.mint, h.maxt, chks)
+
+	return nil
+}
+
+func getSeriesChunks(s *memSeries, mint, maxt int64, chks *[]chunks.Meta) {
 	for i, c := range s.mmappedChunks {
 		// Do not expose chunks that are outside of the specified range.
-		if !c.OverlapsClosedInterval(h.mint, h.maxt) {
+		if !c.OverlapsClosedInterval(mint, maxt) {
 			continue
 		}
 		*chks = append(*chks, chunks.Meta{
@@ -223,7 +229,7 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchB
 			} else {
 				maxTime = chk.maxTime
 			}
-			if chk.OverlapsClosedInterval(h.mint, h.maxt) {
+			if chk.OverlapsClosedInterval(mint, maxt) {
 				*chks = append(*chks, chunks.Meta{
 					MinTime: chk.minTime,
 					MaxTime: maxTime,
@@ -233,8 +239,6 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchB
 			j++
 		}
 	}
-
-	return nil
 }
 
 // headChunkID returns the HeadChunkID referred to by the given position.

From a32aca0cd74d5d1acbef0abd58ec48f2a8e560c5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 27 Jun 2024 09:25:26 +0100
Subject: [PATCH 15/51] Refactoring: extract getOOOSeriesChunks

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 9d5b9d6443..892d2c4b65 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -92,6 +92,10 @@ func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.Scra
 		return nil
 	}
 
+	return getOOOSeriesChunks(s, oh.mint, oh.maxt, lastGarbageCollectedMmapRef, maxMmapRef, chks)
+}
+
+func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, chks *[]chunks.Meta) error {
 	tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks))
 
 	addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) {
@@ -106,7 +110,7 @@ func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.Scra
 	// Collect all chunks that overlap the query range.
 	if s.ooo.oooHeadChunk != nil {
 		c := s.ooo.oooHeadChunk
-		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && maxMmapRef == 0 {
+		if c.OverlapsClosedInterval(mint, maxt) && maxMmapRef == 0 {
 			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.ooo.oooMmappedChunks))))
 			if len(c.chunk.samples) > 0 { // Empty samples happens in tests, at least.
 				chks, err := s.ooo.oooHeadChunk.chunk.ToEncodedChunks(c.minTime, c.maxTime)
@@ -125,7 +129,7 @@ func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.Scra
 	}
 	for i := len(s.ooo.oooMmappedChunks) - 1; i >= 0; i-- {
 		c := s.ooo.oooMmappedChunks[i]
-		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && (maxMmapRef == 0 || maxMmapRef.GreaterThanOrEqualTo(c.ref)) && (lastGarbageCollectedMmapRef == 0 || c.ref.GreaterThan(lastGarbageCollectedMmapRef)) {
+		if c.OverlapsClosedInterval(mint, maxt) && (maxMmapRef == 0 || maxMmapRef.GreaterThanOrEqualTo(c.ref)) && (lastGarbageCollectedMmapRef == 0 || c.ref.GreaterThan(lastGarbageCollectedMmapRef)) {
 			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i)))
 			addChunk(c.minTime, c.maxTime, ref, nil)
 		}

From 7e24844d081f82ef1d3933ace4477d60cec7d05b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 26 Jun 2024 20:48:39 +0100
Subject: [PATCH 16/51] Refactor: extract headChunkReader.chunkFromSeries()

For when you have a series locked already.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head_read.go | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index ff9345fa00..d75d28a58d 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -362,9 +362,14 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.
 	}
 
 	s.Lock()
+	defer s.Unlock()
+	return h.chunkFromSeries(s, cid, copyLastChunk)
+}
+
+// Call with s locked.
+func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
 	c, headChunk, isOpen, err := s.chunk(cid, h.head.chunkDiskMapper, &h.head.memChunkPool)
 	if err != nil {
-		s.Unlock()
 		return nil, 0, err
 	}
 	defer func() {
@@ -378,7 +383,6 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.
 
 	// This means that the chunk is outside the specified range.
 	if !c.OverlapsClosedInterval(h.mint, h.maxt) {
-		s.Unlock()
 		return nil, 0, storage.ErrNotFound
 	}
 
@@ -395,7 +399,6 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.
 			return nil, 0, err
 		}
 	}
-	s.Unlock()
 
 	return &safeHeadChunk{
 		Chunk:    chk,

From da31da3ea6f46da2b3c605d5a85c4d3fc80dd560 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 27 Jun 2024 10:36:25 +0100
Subject: [PATCH 17/51] Refactor: extract selectSeriesSet and
 selectChunkSeriesSet

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/querier.go | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/tsdb/querier.go b/tsdb/querier.go
index 910c2d7fc1..37456d7e23 100644
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@@ -115,20 +115,24 @@ func NewBlockQuerier(b BlockReader, mint, maxt int64) (storage.Querier, error) {
 }
 
 func (q *blockQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet {
-	mint := q.mint
-	maxt := q.maxt
+	return selectSeriesSet(ctx, sortSeries, hints, ms, q.index, q.chunks, q.tombstones, q.mint, q.maxt)
+}
+
+func selectSeriesSet(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms []*labels.Matcher,
+	index IndexReader, chunks ChunkReader, tombstones tombstones.Reader, mint, maxt int64,
+) storage.SeriesSet {
 	disableTrimming := false
 	sharded := hints != nil && hints.ShardCount > 0
 
-	p, err := PostingsForMatchers(ctx, q.index, ms...)
+	p, err := PostingsForMatchers(ctx, index, ms...)
 	if err != nil {
 		return storage.ErrSeriesSet(err)
 	}
 	if sharded {
-		p = q.index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount)
+		p = index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount)
 	}
 	if sortSeries {
-		p = q.index.SortedPostings(p)
+		p = index.SortedPostings(p)
 	}
 
 	if hints != nil {
@@ -137,11 +141,11 @@ func (q *blockQuerier) Select(ctx context.Context, sortSeries bool, hints *stora
 		disableTrimming = hints.DisableTrimming
 		if hints.Func == "series" {
 			// When you're only looking up metadata (for example series API), you don't need to load any chunks.
-			return newBlockSeriesSet(q.index, newNopChunkReader(), q.tombstones, p, mint, maxt, disableTrimming)
+			return newBlockSeriesSet(index, newNopChunkReader(), tombstones, p, mint, maxt, disableTrimming)
 		}
 	}
 
-	return newBlockSeriesSet(q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming)
+	return newBlockSeriesSet(index, chunks, tombstones, p, mint, maxt, disableTrimming)
 }
 
 // blockChunkQuerier provides chunk querying access to a single block database.
@@ -159,8 +163,12 @@ func NewBlockChunkQuerier(b BlockReader, mint, maxt int64) (storage.ChunkQuerier
 }
 
 func (q *blockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.ChunkSeriesSet {
-	mint := q.mint
-	maxt := q.maxt
+	return selectChunkSeriesSet(ctx, sortSeries, hints, ms, q.blockID, q.index, q.chunks, q.tombstones, q.mint, q.maxt)
+}
+
+func selectChunkSeriesSet(ctx context.Context, sortSeries bool, hints *storage.SelectHints, ms []*labels.Matcher,
+	blockID ulid.ULID, index IndexReader, chunks ChunkReader, tombstones tombstones.Reader, mint, maxt int64,
+) storage.ChunkSeriesSet {
 	disableTrimming := false
 	sharded := hints != nil && hints.ShardCount > 0
 
@@ -169,17 +177,17 @@ func (q *blockChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *
 		maxt = hints.End
 		disableTrimming = hints.DisableTrimming
 	}
-	p, err := PostingsForMatchers(ctx, q.index, ms...)
+	p, err := PostingsForMatchers(ctx, index, ms...)
 	if err != nil {
 		return storage.ErrChunkSeriesSet(err)
 	}
 	if sharded {
-		p = q.index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount)
+		p = index.ShardedPostings(p, hints.ShardIndex, hints.ShardCount)
 	}
 	if sortSeries {
-		p = q.index.SortedPostings(p)
+		p = index.SortedPostings(p)
 	}
-	return NewBlockChunkSeriesSet(q.blockID, q.index, q.chunks, q.tombstones, p, mint, maxt, disableTrimming)
+	return NewBlockChunkSeriesSet(blockID, index, chunks, tombstones, p, mint, maxt, disableTrimming)
 }
 
 // PostingsForMatchers assembles a single postings iterator against the index reader

From 2936ab80d7dbc0c944d99346ea7ab26449fe82d3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 27 Jun 2024 12:47:31 +0100
Subject: [PATCH 18/51] [Tests] Promtool: Sort output where Prometheus does not
 guarantee the order.

Previously this was working because iout-of-order chunks forced a sort and merge.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 cmd/promtool/tsdb_test.go | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go
index 75089b168b..d7cc560881 100644
--- a/cmd/promtool/tsdb_test.go
+++ b/cmd/promtool/tsdb_test.go
@@ -20,6 +20,7 @@ import (
 	"math"
 	"os"
 	"runtime"
+	"slices"
 	"strings"
 	"testing"
 	"time"
@@ -152,12 +153,18 @@ func TestTSDBDump(t *testing.T) {
 			expectedMetrics, err := os.ReadFile(tt.expectedDump)
 			require.NoError(t, err)
 			expectedMetrics = normalizeNewLine(expectedMetrics)
-			// even though in case of one matcher samples are not sorted, the order in the cases above should stay the same.
-			require.Equal(t, string(expectedMetrics), dumpedMetrics)
+			// Sort both, because Prometheus does not guarantee the output order.
+			require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
 		})
 	}
 }
 
+func sortLines(buf string) string {
+	lines := strings.Split(buf, "\n")
+	slices.Sort(lines)
+	return strings.Join(lines, "\n")
+}
+
 func TestTSDBDumpOpenMetrics(t *testing.T) {
 	storage := promqltest.LoadedStorage(t, `
 		load 1m
@@ -169,7 +176,7 @@ func TestTSDBDumpOpenMetrics(t *testing.T) {
 	require.NoError(t, err)
 	expectedMetrics = normalizeNewLine(expectedMetrics)
 	dumpedMetrics := getDumpedSamples(t, storage.Dir(), math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
-	require.Equal(t, string(expectedMetrics), dumpedMetrics)
+	require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
 }
 
 func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {

From e04d137649697ea59b0e5dbfad965ae24d6c0faa Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 24 Jun 2024 13:41:44 +0100
Subject: [PATCH 19/51] [PERF] TSDB: Query head and ooo-head together

Add `HeadAndOOOQuerier` which iterates just once over series, then
where necessary merges chunks from in-order and out-of-order lists.

Add a ChunkQuerier for in-order and ooo together

Add copy-last-chunk behaviour to HeadAndOOOChunkReader

Out-of-order chunk IDs are distinguished from in-order by setting bit 23.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/db.go                 |  58 +++++++-----
 tsdb/head_read.go          |  51 +++++++++--
 tsdb/ooo_head_read.go      | 182 ++++++++++++++++++++++++++++++++++++-
 tsdb/ooo_head_read_test.go |   2 +-
 tsdb/querier.go            |  12 ++-
 5 files changed, 263 insertions(+), 42 deletions(-)

diff --git a/tsdb/db.go b/tsdb/db.go
index 1c430c211a..bf1893ec07 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -2029,7 +2029,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 		}
 	}
 
-	blockQueriers := make([]storage.Querier, 0, len(blocks)+2) // +2 to allow for possible in-order and OOO head queriers
+	blockQueriers := make([]storage.Querier, 0, len(blocks)+1) // +1 to allow for possible head querier.
 
 	defer func() {
 		if err != nil {
@@ -2041,10 +2041,11 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 		}
 	}()
 
+	var headQuerier storage.Querier
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
 		var err error
-		inOrderHeadQuerier, err := db.blockQuerierFunc(rh, mint, maxt)
+		headQuerier, err = db.blockQuerierFunc(rh, mint, maxt)
 		if err != nil {
 			return nil, fmt.Errorf("open block querier for head %s: %w", rh, err)
 		}
@@ -2054,36 +2055,40 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 		// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
 		shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt)
 		if shouldClose {
-			if err := inOrderHeadQuerier.Close(); err != nil {
+			if err := headQuerier.Close(); err != nil {
 				return nil, fmt.Errorf("closing head block querier %s: %w", rh, err)
 			}
-			inOrderHeadQuerier = nil
+			headQuerier = nil
 		}
 		if getNew {
 			rh := NewRangeHead(db.head, newMint, maxt)
-			inOrderHeadQuerier, err = db.blockQuerierFunc(rh, newMint, maxt)
+			headQuerier, err = db.blockQuerierFunc(rh, newMint, maxt)
 			if err != nil {
 				return nil, fmt.Errorf("open block querier for head while getting new querier %s: %w", rh, err)
 			}
 		}
-
-		if inOrderHeadQuerier != nil {
-			blockQueriers = append(blockQueriers, inOrderHeadQuerier)
-		}
 	}
 
-	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+	if headQuerier != nil {
+		if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+			// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
+			isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
+			headQuerier = NewHeadAndOOOQuerier(mint, maxt, db.head, isoState, headQuerier)
+		}
+	} else if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
 		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
 		var err error
-		outOfOrderHeadQuerier, err := db.blockQuerierFunc(rh, mint, maxt)
+		headQuerier, err = db.blockQuerierFunc(rh, mint, maxt)
 		if err != nil {
 			// If BlockQuerierFunc() failed, make sure to clean up the pending read created by NewOOORangeHead.
 			rh.isoState.Close()
 
 			return nil, fmt.Errorf("open block querier for ooo head %s: %w", rh, err)
 		}
+	}
 
-		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
+	if headQuerier != nil {
+		blockQueriers = append(blockQueriers, headQuerier)
 	}
 
 	for _, b := range blocks {
@@ -2111,7 +2116,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer
 		}
 	}
 
-	blockQueriers := make([]storage.ChunkQuerier, 0, len(blocks)+2) // +2 to allow for possible in-order and OOO head queriers
+	blockQueriers := make([]storage.ChunkQuerier, 0, len(blocks)+1) // +1 to allow for possible head querier.
 
 	defer func() {
 		if err != nil {
@@ -2123,9 +2128,10 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer
 		}
 	}()
 
+	var headQuerier storage.ChunkQuerier
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
-		inOrderHeadQuerier, err := db.blockChunkQuerierFunc(rh, mint, maxt)
+		headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt)
 		if err != nil {
 			return nil, fmt.Errorf("open querier for head %s: %w", rh, err)
 		}
@@ -2135,35 +2141,39 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer
 		// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
 		shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt)
 		if shouldClose {
-			if err := inOrderHeadQuerier.Close(); err != nil {
+			if err := headQuerier.Close(); err != nil {
 				return nil, fmt.Errorf("closing head querier %s: %w", rh, err)
 			}
-			inOrderHeadQuerier = nil
+			headQuerier = nil
 		}
 		if getNew {
 			rh := NewRangeHead(db.head, newMint, maxt)
-			inOrderHeadQuerier, err = db.blockChunkQuerierFunc(rh, newMint, maxt)
+			headQuerier, err = db.blockChunkQuerierFunc(rh, newMint, maxt)
 			if err != nil {
 				return nil, fmt.Errorf("open querier for head while getting new querier %s: %w", rh, err)
 			}
 		}
-
-		if inOrderHeadQuerier != nil {
-			blockQueriers = append(blockQueriers, inOrderHeadQuerier)
-		}
 	}
 
-	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+	if headQuerier != nil {
+		if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+			// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
+			isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
+			headQuerier = NewHeadAndOOOChunkQuerier(mint, maxt, db.head, isoState, headQuerier)
+		}
+	} else if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
 		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
-		outOfOrderHeadQuerier, err := db.blockChunkQuerierFunc(rh, mint, maxt)
+		headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt)
 		if err != nil {
 			// If NewBlockQuerier() failed, make sure to clean up the pending read created by NewOOORangeHead.
 			rh.isoState.Close()
 
 			return nil, fmt.Errorf("open block chunk querier for ooo head %s: %w", rh, err)
 		}
+	}
 
-		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
+	if headQuerier != nil {
+		blockQueriers = append(blockQueriers, headQuerier)
 	}
 
 	for _, b := range blocks {
diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index d75d28a58d..977d6b978d 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -248,12 +248,20 @@ func (s *memSeries) headChunkID(pos int) chunks.HeadChunkID {
 	return chunks.HeadChunkID(pos) + s.firstChunkID
 }
 
+const oooChunkIDMask = 1 << 23
+
 // oooHeadChunkID returns the HeadChunkID referred to by the given position.
+// Only the bottom 24 bits are used. Bit 23 is always 1 for an OOO chunk; for the rest:
 // * 0 <= pos < len(s.oooMmappedChunks) refer to s.oooMmappedChunks[pos]
 // * pos == len(s.oooMmappedChunks) refers to s.oooHeadChunk
 // The caller must ensure that s.ooo is not nil.
 func (s *memSeries) oooHeadChunkID(pos int) chunks.HeadChunkID {
-	return chunks.HeadChunkID(pos) + s.ooo.firstOOOChunkID
+	return (chunks.HeadChunkID(pos) + s.ooo.firstOOOChunkID) | oooChunkIDMask
+}
+
+func unpackHeadChunkRef(ref chunks.ChunkRef) (chunks.HeadSeriesRef, chunks.HeadChunkID, bool) {
+	sid, cid := chunks.HeadChunkRef(ref).Unpack()
+	return sid, (cid & (oooChunkIDMask - 1)), (cid & oooChunkIDMask) != 0
 }
 
 // LabelValueFor returns label value for the given label name in the series referred to by ID.
@@ -343,10 +351,15 @@ func (h *headChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chu
 	return chk, nil, err
 }
 
-// ChunkWithCopy returns the chunk for the reference number.
-// If the chunk is the in-memory chunk, then it makes a copy and returns the copied chunk.
-func (h *headChunkReader) ChunkWithCopy(meta chunks.Meta) (chunkenc.Chunk, int64, error) {
-	return h.chunk(meta, true)
+type ChunkReaderWithCopy interface {
+	ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error)
+}
+
+// ChunkOrIterableWithCopy returns the chunk for the reference number.
+// If the chunk is the in-memory chunk, then it makes a copy and returns the copied chunk, plus the max time of the chunk.
+func (h *headChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
+	chk, maxTime, err := h.chunk(meta, true)
+	return chk, nil, maxTime, err
 }
 
 // chunk returns the chunk for the reference number.
@@ -472,10 +485,11 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 // chunks.Meta reference from memory or by m-mapping it from the disk. The
 // returned iterable will be a merge of all the overlapping chunks, if any,
 // amongst all the chunks in the OOOHead.
+// If hr is non-nil then in-order chunks are included.
 // This function is not thread safe unless the caller holds a lock.
 // The caller must ensure that s.ooo is not nil.
-func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (*mergedOOOChunks, error) {
-	_, cid := chunks.HeadChunkRef(meta.Ref).Unpack()
+func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (*mergedOOOChunks, error) {
+	_, cid, _ := unpackHeadChunkRef(meta.Ref)
 
 	// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
 	// incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index.
@@ -516,6 +530,17 @@ func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMappe
 		tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{meta: meta})
 	}
 
+	if hr != nil { // Include in-order chunks.
+		var metas []chunks.Meta
+		getSeriesChunks(s, max(meta.MinTime, mint), min(meta.MaxTime, maxt), &metas)
+		for _, m := range metas {
+			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
+				meta: m,
+				ref:  0, // This tells the loop below it's an in-order head chunk.
+			})
+		}
+	}
+
 	// Next we want to sort all the collected chunks by min time so we can find
 	// those that overlap and stop when we know the rest don't.
 	slices.SortFunc(tmpChks, refLessByMinTimeAndMinRef)
@@ -527,9 +552,17 @@ func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMappe
 			continue
 		}
 		var iterable chunkenc.Iterable
-		if c.meta.Chunk != nil {
+		switch {
+		case c.meta.Chunk != nil:
 			iterable = c.meta.Chunk
-		} else {
+		case c.ref == 0: // This is an in-order head chunk.
+			_, cid := chunks.HeadChunkRef(c.meta.Ref).Unpack()
+			var err error
+			iterable, _, err = hr.chunkFromSeries(s, cid, false)
+			if err != nil {
+				return nil, fmt.Errorf("invalid head chunk: %w", err)
+			}
+		default:
 			chk, err := cdm.Chunk(c.ref)
 			if err != nil {
 				var cerr *chunks.CorruptionErr
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 892d2c4b65..b7944c56e1 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -27,6 +27,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/chunks"
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/prometheus/prometheus/tsdb/tombstones"
+	"github.com/prometheus/prometheus/util/annotations"
 )
 
 var _ IndexReader = &OOOHeadIndexReader{}
@@ -92,10 +93,10 @@ func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.Scra
 		return nil
 	}
 
-	return getOOOSeriesChunks(s, oh.mint, oh.maxt, lastGarbageCollectedMmapRef, maxMmapRef, chks)
+	return getOOOSeriesChunks(s, oh.mint, oh.maxt, lastGarbageCollectedMmapRef, maxMmapRef, false, chks)
 }
 
-func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, chks *[]chunks.Meta) error {
+func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, chks *[]chunks.Meta) error {
 	tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks))
 
 	addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) {
@@ -135,6 +136,10 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 		}
 	}
 
+	if includeInOrder {
+		getSeriesChunks(s, mint, maxt, &tmpChks)
+	}
+
 	// There is nothing to do if we did not collect any chunk.
 	if len(tmpChks) == 0 {
 		return nil
@@ -275,7 +280,7 @@ func (cr OOOHeadChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk,
 		s.Unlock()
 		return nil, nil, storage.ErrNotFound
 	}
-	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, cr.mint, cr.maxt, cr.maxMmapRef)
+	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, nil, cr.mint, cr.maxt, cr.maxMmapRef)
 	s.Unlock()
 	if err != nil {
 		return nil, nil, err
@@ -498,3 +503,174 @@ func (ir *OOOCompactionHeadIndexReader) LabelNamesFor(ctx context.Context, posti
 func (ir *OOOCompactionHeadIndexReader) Close() error {
 	return ir.ch.oooIR.Close()
 }
+
+// HeadAndOOOQuerier queries both the head and the out-of-order head.
+type HeadAndOOOQuerier struct {
+	mint, maxt int64
+	head       *Head
+	index      IndexReader
+	chunkr     ChunkReader
+	querier    storage.Querier
+}
+
+func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier {
+	isoState := head.iso.State(mint, maxt)
+	return &HeadAndOOOQuerier{
+		mint:    mint,
+		maxt:    maxt,
+		head:    head,
+		index:   NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef),
+		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, isoState, oooIsoState, 0),
+		querier: querier,
+	}
+}
+
+func (q *HeadAndOOOQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) {
+	return q.querier.LabelValues(ctx, name, hints, matchers...)
+}
+
+func (q *HeadAndOOOQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) {
+	return q.querier.LabelNames(ctx, hints, matchers...)
+}
+
+func (q *HeadAndOOOQuerier) Close() error {
+	q.chunkr.Close()
+	return q.querier.Close()
+}
+
+func (q *HeadAndOOOQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet {
+	return selectSeriesSet(ctx, sortSeries, hints, matchers, q.index, q.chunkr, q.head.tombstones, q.mint, q.maxt)
+}
+
+// HeadAndOOOChunkQuerier queries both the head and the out-of-order head.
+type HeadAndOOOChunkQuerier struct {
+	mint, maxt int64
+	head       *Head
+	index      IndexReader
+	chunkr     ChunkReader
+	querier    storage.ChunkQuerier
+}
+
+func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier {
+	isoState := head.iso.State(mint, maxt)
+	return &HeadAndOOOChunkQuerier{
+		mint:    mint,
+		maxt:    maxt,
+		head:    head,
+		index:   NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef),
+		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, isoState, oooIsoState, 0),
+		querier: querier,
+	}
+}
+
+func (q *HeadAndOOOChunkQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) {
+	return q.querier.LabelValues(ctx, name, hints, matchers...)
+}
+
+func (q *HeadAndOOOChunkQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) {
+	return q.querier.LabelNames(ctx, hints, matchers...)
+}
+
+func (q *HeadAndOOOChunkQuerier) Close() error {
+	q.chunkr.Close()
+	return q.querier.Close()
+}
+
+func (q *HeadAndOOOChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.ChunkSeriesSet {
+	return selectChunkSeriesSet(ctx, sortSeries, hints, matchers, rangeHeadULID, q.index, q.chunkr, q.head.tombstones, q.mint, q.maxt)
+}
+
+type HeadAndOOOIndexReader struct {
+	*headIndexReader            // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
+	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
+}
+
+func NewHeadAndOOOIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader {
+	hr := &headIndexReader{
+		head: head,
+		mint: mint,
+		maxt: maxt,
+	}
+	return &HeadAndOOOIndexReader{hr, lastGarbageCollectedMmapRef}
+}
+
+func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
+	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
+	if s == nil {
+		oh.head.metrics.seriesNotFound.Inc()
+		return storage.ErrNotFound
+	}
+	builder.Assign(s.lset)
+
+	if chks == nil {
+		return nil
+	}
+
+	s.Lock()
+	defer s.Unlock()
+	*chks = (*chks)[:0]
+
+	if s.ooo != nil {
+		return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks)
+	}
+	getSeriesChunks(s, oh.mint, oh.maxt, chks)
+	return nil
+}
+
+type HeadAndOOOChunkReader struct {
+	cr          headChunkReader
+	maxMmapRef  chunks.ChunkDiskMapperRef
+	oooIsoState *oooIsolationState
+}
+
+func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, isoState *isolationState, oooIsoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOChunkReader {
+	return &HeadAndOOOChunkReader{
+		cr: headChunkReader{
+			head:     head,
+			mint:     mint,
+			maxt:     maxt,
+			isoState: isoState,
+		},
+		maxMmapRef:  maxMmapRef,
+		oooIsoState: oooIsoState,
+	}
+}
+
+func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
+	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
+	if !isOOO {
+		return cr.cr.ChunkOrIterable(meta)
+	}
+
+	s := cr.cr.head.series.getByID(sid)
+	// This means that the series has been garbage collected.
+	if s == nil {
+		return nil, nil, storage.ErrNotFound
+	}
+
+	s.Lock()
+	mc, err := s.oooMergedChunks(meta, cr.cr.head.chunkDiskMapper, &cr.cr, cr.cr.mint, cr.cr.maxt, cr.maxMmapRef)
+	s.Unlock()
+
+	return nil, mc, err
+}
+
+// Pass through special behaviour for current head chunk.
+func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
+	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
+	if !isOOO {
+		return cr.cr.ChunkOrIterableWithCopy(meta)
+	}
+	chk, iter, err := cr.ChunkOrIterable(meta)
+	return chk, iter, 0, err
+}
+
+func (cr *HeadAndOOOChunkReader) Close() error {
+	if cr.cr.isoState != nil {
+		cr.cr.isoState.Close()
+	}
+	if cr.oooIsoState != nil {
+		cr.oooIsoState.Close()
+	}
+	return nil
+}
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index 8cc3f1dde6..08c5c4a3ee 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -316,7 +316,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 						// Ref to whatever Ref the chunk has, that we refer to by ID
 						for ref, c := range intervals {
 							if c.ID == e.ID {
-								meta.Ref = chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), chunks.HeadChunkID(ref)))
+								meta.Ref = chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), s1.oooHeadChunkID(ref)))
 								break
 							}
 						}
diff --git a/tsdb/querier.go b/tsdb/querier.go
index 37456d7e23..2e15f0b084 100644
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@@ -641,14 +641,16 @@ func (p *populateWithDelGenericSeriesIterator) next(copyHeadChunk bool) bool {
 		}
 	}
 
-	hcr, ok := p.cr.(*headChunkReader)
+	hcr, ok := p.cr.(ChunkReaderWithCopy)
 	var iterable chunkenc.Iterable
 	if ok && copyHeadChunk && len(p.bufIter.Intervals) == 0 {
-		// ChunkWithCopy will copy the head chunk.
+		// ChunkOrIterableWithCopy will copy the head chunk, if it can.
 		var maxt int64
-		p.currMeta.Chunk, maxt, p.err = hcr.ChunkWithCopy(p.currMeta)
-		// For the in-memory head chunk the index reader sets maxt as MaxInt64. We fix it here.
-		p.currMeta.MaxTime = maxt
+		p.currMeta.Chunk, iterable, maxt, p.err = hcr.ChunkOrIterableWithCopy(p.currMeta)
+		if p.currMeta.Chunk != nil {
+			// For the in-memory head chunk the index reader sets maxt as MaxInt64. We fix it here.
+			p.currMeta.MaxTime = maxt
+		}
 	} else {
 		p.currMeta.Chunk, iterable, p.err = p.cr.ChunkOrIterable(p.currMeta)
 	}

From 6529d6336cc277aefef78a595128a65a719e86a0 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 15 Jul 2024 20:07:12 +0100
Subject: [PATCH 20/51] TSDB: NewHeadAndOOOChunkReader takes headChunkReader

So we can pass nil and have it read just OOO chunks.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index b7944c56e1..4be4e9e18d 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -514,13 +514,18 @@ type HeadAndOOOQuerier struct {
 }
 
 func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier {
-	isoState := head.iso.State(mint, maxt)
+	cr := &headChunkReader{
+		head:     head,
+		mint:     mint,
+		maxt:     maxt,
+		isoState: head.iso.State(mint, maxt),
+	}
 	return &HeadAndOOOQuerier{
 		mint:    mint,
 		maxt:    maxt,
 		head:    head,
 		index:   NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef),
-		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, isoState, oooIsoState, 0),
+		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0),
 		querier: querier,
 	}
 }
@@ -552,13 +557,18 @@ type HeadAndOOOChunkQuerier struct {
 }
 
 func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier {
-	isoState := head.iso.State(mint, maxt)
+	cr := &headChunkReader{
+		head:     head,
+		mint:     mint,
+		maxt:     maxt,
+		isoState: head.iso.State(mint, maxt),
+	}
 	return &HeadAndOOOChunkQuerier{
 		mint:    mint,
 		maxt:    maxt,
 		head:    head,
 		index:   NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef),
-		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, isoState, oooIsoState, 0),
+		chunkr:  NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0),
 		querier: querier,
 	}
 }
@@ -618,19 +628,19 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 }
 
 type HeadAndOOOChunkReader struct {
-	cr          headChunkReader
+	head        *Head
+	mint, maxt  int64
+	cr          *headChunkReader // If nil, only read OOO chunks.
 	maxMmapRef  chunks.ChunkDiskMapperRef
 	oooIsoState *oooIsolationState
 }
 
-func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, isoState *isolationState, oooIsoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOChunkReader {
+func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, oooIsoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOChunkReader {
 	return &HeadAndOOOChunkReader{
-		cr: headChunkReader{
-			head:     head,
-			mint:     mint,
-			maxt:     maxt,
-			isoState: isoState,
-		},
+		head:        head,
+		mint:        mint,
+		maxt:        maxt,
+		cr:          cr,
 		maxMmapRef:  maxMmapRef,
 		oooIsoState: oooIsoState,
 	}
@@ -642,14 +652,14 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu
 		return cr.cr.ChunkOrIterable(meta)
 	}
 
-	s := cr.cr.head.series.getByID(sid)
+	s := cr.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
 	if s == nil {
 		return nil, nil, storage.ErrNotFound
 	}
 
 	s.Lock()
-	mc, err := s.oooMergedChunks(meta, cr.cr.head.chunkDiskMapper, &cr.cr, cr.cr.mint, cr.cr.maxt, cr.maxMmapRef)
+	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
 	s.Unlock()
 
 	return nil, mc, err
@@ -666,7 +676,7 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chun
 }
 
 func (cr *HeadAndOOOChunkReader) Close() error {
-	if cr.cr.isoState != nil {
+	if cr.cr != nil && cr.cr.isoState != nil {
 		cr.cr.isoState.Close()
 	}
 	if cr.oooIsoState != nil {

From f26159794434d20c0ec3081d0bd080b37756cc60 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 15 Jul 2024 20:56:55 +0100
Subject: [PATCH 21/51] TSDB: Fix up LabelValues to work for OOO-only head

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go      | 14 ++++++++++++++
 tsdb/ooo_head_read_test.go | 10 +++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 4be4e9e18d..f844cfaca1 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -627,6 +627,20 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 	return nil
 }
 
+// LabelValues needs to be overridden from the headIndexReader implementation
+// so we can return labels within either in-order range or ooo range.
+func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
+	if oh.maxt < oh.head.MinTime() && oh.maxt < oh.head.MinOOOTime() || oh.mint > oh.head.MaxTime() && oh.mint > oh.head.MaxOOOTime() {
+		return []string{}, nil
+	}
+
+	if len(matchers) == 0 {
+		return oh.head.postings.LabelValues(ctx, name), nil
+	}
+
+	return labelValuesWithMatchers(ctx, oh, name, matchers...)
+}
+
 type HeadAndOOOChunkReader struct {
 	head        *Head
 	mint, maxt  int64
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index 08c5c4a3ee..b837b9e2fc 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -421,17 +421,17 @@ func testOOOHeadChunkReader_LabelValues(t *testing.T, scenario sampleTypeScenari
 			name:       "LabelValues calls with ooo head query range not overlapping out-of-order data",
 			queryMinT:  100,
 			queryMaxT:  100,
-			expValues1: []string{},
-			expValues2: []string{},
-			expValues3: []string{},
-			expValues4: []string{},
+			expValues1: []string{"bar1"},
+			expValues2: nil,
+			expValues3: []string{"bar1", "bar2"},
+			expValues4: []string{"bar1", "bar2"},
 		},
 	}
 
 	for _, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
 			// We first want to test using a head index reader that covers the biggest query interval
-			oh := NewOOOHeadIndexReader(head, tc.queryMinT, tc.queryMaxT, 0)
+			oh := NewHeadAndOOOIndexReader(head, tc.queryMinT, tc.queryMaxT, 0)
 			matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")}
 			values, err := oh.LabelValues(ctx, "foo", matchers...)
 			sort.Strings(values)

From ce4eac859a3231eadcfb392e3c2d27d243c0e07f Mon Sep 17 00:00:00 2001
From: suntala <arati.rana@grafana.com>
Date: Thu, 8 Aug 2024 20:59:00 +0200
Subject: [PATCH 22/51] Link to specific feature flag entry

Signed-off-by: suntala <arati.rana@grafana.com>
---
 docs/querying/functions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/querying/functions.md b/docs/querying/functions.md
index ee81328b5e..bf2701b881 100644
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@@ -617,7 +617,7 @@ Like `sort`, `sort_desc` only affects the results of instant queries, as range q
 
 ## `sort_by_label()`
 
-**This function has to be enabled via the [feature flag](../feature_flags.md) `--enable-feature=promql-experimental-functions`.**
+**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**
 
 `sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order.
 
@@ -627,7 +627,7 @@ This function uses [natural sort order](https://en.wikipedia.org/wiki/Natural_so
 
 ## `sort_by_label_desc()`
 
-**This function has to be enabled via the [feature flag](../feature_flags.md) `--enable-feature=promql-experimental-functions`.**
+**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**
 
 Same as `sort_by_label`, but sorts in descending order.
 
@@ -676,7 +676,7 @@ over time and return an instant vector with per-series aggregation results:
 * `last_over_time(range-vector)`: the most recent point value in the specified interval.
 * `present_over_time(range-vector)`: the value 1 for any series in the specified interval.
 
-If the [feature flag](../feature_flags.md)
+If the [feature flag](../feature_flags.md#experimental-promql-functions)
 `--enable-feature=promql-experimental-functions` is set, the following
 additional functions are available:
 

From 0a2ff76881a82bd2751cd3f316494b9ab5621b07 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 15 Jul 2024 18:17:48 +0100
Subject: [PATCH 23/51] TSDB tests: Fix up BenchmarkQueries

Was not working even on main.  Some cases still error.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/querier_test.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go
index ffdf8dc028..50525f65f4 100644
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@@ -3169,12 +3169,11 @@ func BenchmarkQueries(b *testing.B) {
 
 					qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples)
 					require.NoError(b, err)
-					qOOOHead, err := NewBlockQuerier(NewOOORangeHead(head, 1, nSamples, 0), 1, nSamples)
-					require.NoError(b, err)
+					isoState := head.oooIso.TrackReadAfter(0)
+					qOOOHead := NewHeadAndOOOQuerier(1, nSamples, head, isoState, qHead)
 
 					queryTypes = append(queryTypes, qt{
-						fmt.Sprintf("_Head_oooPercent:%d", oooPercentage),
-						storage.NewMergeQuerier([]storage.Querier{qHead, qOOOHead}, nil, storage.ChainedSeriesMerge),
+						fmt.Sprintf("_Head_oooPercent:%d", oooPercentage), qOOOHead,
 					})
 				}
 

From e7e50a3afd285136366ebbb0270cce442df3c1b1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 15 Jul 2024 18:27:31 +0100
Subject: [PATCH 24/51] TSDB: Remove code for querying OOO-head only

Just query via `HeadAndOOOQuerier`, which will skip series where no
in-order chunks are in range.

Now we don't need `OOORangeHead`.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/db.go       | 45 ++++++++--------------------
 tsdb/ooo_head.go | 78 ------------------------------------------------
 2 files changed, 12 insertions(+), 111 deletions(-)

diff --git a/tsdb/db.go b/tsdb/db.go
index bf1893ec07..94c44161d4 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -2041,8 +2041,9 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 		}
 	}()
 
+	overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime())
 	var headQuerier storage.Querier
-	if maxt >= db.head.MinTime() {
+	if maxt >= db.head.MinTime() || overlapsOOO {
 		rh := NewRangeHead(db.head, mint, maxt)
 		var err error
 		headQuerier, err = db.blockQuerierFunc(rh, mint, maxt)
@@ -2069,22 +2070,10 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) {
 		}
 	}
 
-	if headQuerier != nil {
-		if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-			// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
-			isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
-			headQuerier = NewHeadAndOOOQuerier(mint, maxt, db.head, isoState, headQuerier)
-		}
-	} else if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
-		var err error
-		headQuerier, err = db.blockQuerierFunc(rh, mint, maxt)
-		if err != nil {
-			// If BlockQuerierFunc() failed, make sure to clean up the pending read created by NewOOORangeHead.
-			rh.isoState.Close()
-
-			return nil, fmt.Errorf("open block querier for ooo head %s: %w", rh, err)
-		}
+	if overlapsOOO {
+		// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
+		isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
+		headQuerier = NewHeadAndOOOQuerier(mint, maxt, db.head, isoState, headQuerier)
 	}
 
 	if headQuerier != nil {
@@ -2128,8 +2117,9 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer
 		}
 	}()
 
+	overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime())
 	var headQuerier storage.ChunkQuerier
-	if maxt >= db.head.MinTime() {
+	if maxt >= db.head.MinTime() || overlapsOOO {
 		rh := NewRangeHead(db.head, mint, maxt)
 		headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt)
 		if err != nil {
@@ -2155,21 +2145,10 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer
 		}
 	}
 
-	if headQuerier != nil {
-		if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-			// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
-			isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
-			headQuerier = NewHeadAndOOOChunkQuerier(mint, maxt, db.head, isoState, headQuerier)
-		}
-	} else if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
-		rh := NewOOORangeHead(db.head, mint, maxt, db.lastGarbageCollectedMmapRef)
-		headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt)
-		if err != nil {
-			// If NewBlockQuerier() failed, make sure to clean up the pending read created by NewOOORangeHead.
-			rh.isoState.Close()
-
-			return nil, fmt.Errorf("open block chunk querier for ooo head %s: %w", rh, err)
-		}
+	if overlapsOOO {
+		// We need to fetch from in-order and out-of-order chunks: wrap the headQuerier.
+		isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef)
+		headQuerier = NewHeadAndOOOChunkQuerier(mint, maxt, db.head, isoState, headQuerier)
 	}
 
 	if headQuerier != nil {
diff --git a/tsdb/ooo_head.go b/tsdb/ooo_head.go
index 209b14673c..0ed9f36484 100644
--- a/tsdb/ooo_head.go
+++ b/tsdb/ooo_head.go
@@ -14,16 +14,10 @@
 package tsdb
 
 import (
-	"fmt"
 	"sort"
 
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
-
-	"github.com/oklog/ulid"
-
-	"github.com/prometheus/prometheus/tsdb/chunks"
-	"github.com/prometheus/prometheus/tsdb/tombstones"
 )
 
 // OOOChunk maintains samples in time-ascending order.
@@ -171,75 +165,3 @@ func (o *OOOChunk) ToEncodedChunks(mint, maxt int64) (chks []memChunk, err error
 	}
 	return chks, nil
 }
-
-var _ BlockReader = &OOORangeHead{}
-
-// OOORangeHead allows querying Head out of order samples via BlockReader
-// interface implementation.
-type OOORangeHead struct {
-	head *Head
-	// mint and maxt are tracked because when a query is handled we only want
-	// the timerange of the query and having preexisting pointers to the first
-	// and last timestamp help with that.
-	mint, maxt int64
-
-	isoState *oooIsolationState
-}
-
-func NewOOORangeHead(head *Head, mint, maxt int64, minRef chunks.ChunkDiskMapperRef) *OOORangeHead {
-	isoState := head.oooIso.TrackReadAfter(minRef)
-
-	return &OOORangeHead{
-		head:     head,
-		mint:     mint,
-		maxt:     maxt,
-		isoState: isoState,
-	}
-}
-
-func (oh *OOORangeHead) Index() (IndexReader, error) {
-	return NewOOOHeadIndexReader(oh.head, oh.mint, oh.maxt, oh.isoState.minRef), nil
-}
-
-func (oh *OOORangeHead) Chunks() (ChunkReader, error) {
-	return NewOOOHeadChunkReader(oh.head, oh.mint, oh.maxt, oh.isoState, 0), nil
-}
-
-func (oh *OOORangeHead) Tombstones() (tombstones.Reader, error) {
-	// As stated in the design doc https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing
-	// Tombstones are not supported for out of order metrics.
-	return tombstones.NewMemTombstones(), nil
-}
-
-var oooRangeHeadULID = ulid.MustParse("0000000000XXXX000RANGEHEAD")
-
-func (oh *OOORangeHead) Meta() BlockMeta {
-	return BlockMeta{
-		MinTime: oh.mint,
-		MaxTime: oh.maxt,
-		ULID:    oooRangeHeadULID,
-		Stats: BlockStats{
-			NumSeries: oh.head.NumSeries(),
-		},
-	}
-}
-
-// Size returns the size taken by the Head block.
-func (oh *OOORangeHead) Size() int64 {
-	return oh.head.Size()
-}
-
-// String returns an human readable representation of the out of order range
-// head. It's important to keep this function in order to avoid the struct dump
-// when the head is stringified in errors or logs.
-func (oh *OOORangeHead) String() string {
-	return fmt.Sprintf("ooo range head (mint: %d, maxt: %d)", oh.MinTime(), oh.MaxTime())
-}
-
-func (oh *OOORangeHead) MinTime() int64 {
-	return oh.mint
-}
-
-func (oh *OOORangeHead) MaxTime() int64 {
-	return oh.maxt
-}

From a299c7b6d61cbbfc898962acb3e88430bd7e048e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 15 Jul 2024 20:10:17 +0100
Subject: [PATCH 25/51] TSDB: Remove OOOHeadChunkReader

Use HeadAndOOOChunkReader instead.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go      | 55 +-------------------------------------
 tsdb/ooo_head_read_test.go |  8 +++---
 2 files changed, 5 insertions(+), 58 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index f844cfaca1..01ba129866 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -248,59 +248,6 @@ func (oh *OOOHeadIndexReader) Postings(ctx context.Context, name string, values
 	}
 }
 
-type OOOHeadChunkReader struct {
-	head       *Head
-	mint, maxt int64
-	isoState   *oooIsolationState
-	maxMmapRef chunks.ChunkDiskMapperRef
-}
-
-func NewOOOHeadChunkReader(head *Head, mint, maxt int64, isoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *OOOHeadChunkReader {
-	return &OOOHeadChunkReader{
-		head:       head,
-		mint:       mint,
-		maxt:       maxt,
-		isoState:   isoState,
-		maxMmapRef: maxMmapRef,
-	}
-}
-
-func (cr OOOHeadChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
-	sid, _ := chunks.HeadChunkRef(meta.Ref).Unpack()
-
-	s := cr.head.series.getByID(sid)
-	// This means that the series has been garbage collected.
-	if s == nil {
-		return nil, nil, storage.ErrNotFound
-	}
-
-	s.Lock()
-	if s.ooo == nil {
-		// There is no OOO data for this series.
-		s.Unlock()
-		return nil, nil, storage.ErrNotFound
-	}
-	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, nil, cr.mint, cr.maxt, cr.maxMmapRef)
-	s.Unlock()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	// This means that the query range did not overlap with the requested chunk.
-	if len(mc.chunkIterables) == 0 {
-		return nil, nil, storage.ErrNotFound
-	}
-
-	return nil, mc, nil
-}
-
-func (cr OOOHeadChunkReader) Close() error {
-	if cr.isoState != nil {
-		cr.isoState.Close()
-	}
-	return nil
-}
-
 type OOOCompactionHead struct {
 	oooIR       *OOOHeadIndexReader
 	lastMmapRef chunks.ChunkDiskMapperRef
@@ -397,7 +344,7 @@ func (ch *OOOCompactionHead) Index() (IndexReader, error) {
 }
 
 func (ch *OOOCompactionHead) Chunks() (ChunkReader, error) {
-	return NewOOOHeadChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt, nil, ch.lastMmapRef), nil
+	return NewHeadAndOOOChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt, nil, nil, ch.lastMmapRef), nil
 }
 
 func (ch *OOOCompactionHead) Tombstones() (tombstones.Reader, error) {
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index b837b9e2fc..c0b130ffbb 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -481,10 +481,10 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 	t.Run("Getting a non existing chunk fails with not found error", func(t *testing.T) {
 		db := newTestDBWithOpts(t, opts)
 
-		cr := NewOOOHeadChunkReader(db.head, 0, 1000, nil, 0)
+		cr := NewHeadAndOOOChunkReader(db.head, 0, 1000, nil, nil, 0)
 		defer cr.Close()
 		c, iterable, err := cr.ChunkOrIterable(chunks.Meta{
-			Ref: 0x1000000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300,
+			Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300,
 		})
 		require.Nil(t, iterable)
 		require.Equal(t, err, fmt.Errorf("not found"))
@@ -839,7 +839,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 			require.NoError(t, err)
 			require.Equal(t, len(tc.expChunksSamples), len(chks))
 
-			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil, 0)
+			cr := NewHeadAndOOOChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil, nil, 0)
 			defer cr.Close()
 			for i := 0; i < len(chks); i++ {
 				c, iterable, err := cr.ChunkOrIterable(chks[i])
@@ -1013,7 +1013,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(
 			}
 			require.NoError(t, app.Commit())
 
-			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil, 0)
+			cr := NewHeadAndOOOChunkReader(db.head, tc.queryMinT, tc.queryMaxT, nil, nil, 0)
 			defer cr.Close()
 			for i := 0; i < len(chks); i++ {
 				c, iterable, err := cr.ChunkOrIterable(chks[i])

From 26b3de04387b38fc633ba2ce0931fdf65059086d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 16 Jul 2024 13:56:22 +0100
Subject: [PATCH 26/51] TSDB: Remove OOOHeadIndexReader

Use headIndexReader instead.

OOOCompactionHeadIndexReader needs to be expanded slightly, because it previously delegated to OOOHeadIndexReader.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go      | 127 ++++++++++---------------------------
 tsdb/ooo_head_read_test.go |   6 +-
 2 files changed, 35 insertions(+), 98 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 01ba129866..aad1d2fa8f 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -30,19 +30,6 @@ import (
 	"github.com/prometheus/prometheus/util/annotations"
 )
 
-var _ IndexReader = &OOOHeadIndexReader{}
-
-// OOOHeadIndexReader implements IndexReader so ooo samples in the head can be
-// accessed.
-// It also has a reference to headIndexReader so we can leverage on its
-// IndexReader implementation for all the methods that remain the same. We
-// decided to do this to avoid code duplication.
-// The only methods that change are the ones about getting Series and Postings.
-type OOOHeadIndexReader struct {
-	*headIndexReader            // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
-	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
-}
-
 var _ chunkenc.Iterable = &mergedOOOChunks{}
 
 // mergedOOOChunks holds the list of iterables for overlapping chunks.
@@ -54,48 +41,11 @@ func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator
 	return storage.ChainSampleIteratorFromIterables(iterator, o.chunkIterables)
 }
 
-func NewOOOHeadIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *OOOHeadIndexReader {
-	hr := &headIndexReader{
-		head: head,
-		mint: mint,
-		maxt: maxt,
-	}
-	return &OOOHeadIndexReader{hr, lastGarbageCollectedMmapRef}
-}
-
-func (oh *OOOHeadIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
-	return oh.series(ref, builder, chks, oh.lastGarbageCollectedMmapRef, 0)
-}
-
 // lastGarbageCollectedMmapRef gives the last mmap chunk that may be being garbage collected and so
 // any chunk at or before this ref will not be considered. 0 disables this check.
 //
 // maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then
 // the oooHeadChunk will not be considered.
-func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef) error {
-	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
-
-	if s == nil {
-		oh.head.metrics.seriesNotFound.Inc()
-		return storage.ErrNotFound
-	}
-	builder.Assign(s.labels())
-
-	if chks == nil {
-		return nil
-	}
-
-	s.Lock()
-	defer s.Unlock()
-	*chks = (*chks)[:0]
-
-	if s.ooo == nil {
-		return nil
-	}
-
-	return getOOOSeriesChunks(s, oh.mint, oh.maxt, lastGarbageCollectedMmapRef, maxMmapRef, false, chks)
-}
-
 func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, chks *[]chunks.Meta) error {
 	tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks))
 
@@ -176,21 +126,6 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 	return nil
 }
 
-// LabelValues needs to be overridden from the headIndexReader implementation due
-// to the check that happens at the beginning where we make sure that the query
-// interval overlaps with the head minooot and maxooot.
-func (oh *OOOHeadIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
-	if oh.maxt < oh.head.MinOOOTime() || oh.mint > oh.head.MaxOOOTime() {
-		return []string{}, nil
-	}
-
-	if len(matchers) == 0 {
-		return oh.head.postings.LabelValues(ctx, name), nil
-	}
-
-	return labelValuesWithMatchers(ctx, oh, name, matchers...)
-}
-
 type chunkMetaAndChunkDiskMapperRef struct {
 	meta chunks.Meta
 	ref  chunks.ChunkDiskMapperRef
@@ -232,24 +167,8 @@ func lessByMinTimeAndMinRef(a, b chunks.Meta) int {
 	}
 }
 
-func (oh *OOOHeadIndexReader) Postings(ctx context.Context, name string, values ...string) (index.Postings, error) {
-	switch len(values) {
-	case 0:
-		return index.EmptyPostings(), nil
-	case 1:
-		return oh.head.postings.Get(name, values[0]), nil // TODO(ganesh) Also call GetOOOPostings
-	default:
-		// TODO(ganesh) We want to only return postings for out of order series.
-		res := make([]index.Postings, 0, len(values))
-		for _, value := range values {
-			res = append(res, oh.head.postings.Get(name, value)) // TODO(ganesh) Also call GetOOOPostings
-		}
-		return index.Merge(ctx, res...), nil
-	}
-}
-
 type OOOCompactionHead struct {
-	oooIR       *OOOHeadIndexReader
+	head        *Head
 	lastMmapRef chunks.ChunkDiskMapperRef
 	lastWBLFile int
 	postings    []storage.SeriesRef
@@ -266,6 +185,7 @@ type OOOCompactionHead struct {
 // on the sample append latency. So call NewOOOCompactionHead only right before compaction.
 func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead, error) {
 	ch := &OOOCompactionHead{
+		head:       head,
 		chunkRange: head.chunkRange.Load(),
 		mint:       math.MaxInt64,
 		maxt:       math.MinInt64,
@@ -279,15 +199,14 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead,
 		ch.lastWBLFile = lastWBLFile
 	}
 
-	ch.oooIR = NewOOOHeadIndexReader(head, math.MinInt64, math.MaxInt64, 0)
+	hr := headIndexReader{head: head, mint: ch.mint, maxt: ch.maxt}
 	n, v := index.AllPostingsKey()
-
-	// TODO: verify this gets only ooo samples.
-	p, err := ch.oooIR.Postings(ctx, n, v)
+	// TODO: filter to series with OOO samples, before sorting.
+	p, err := hr.Postings(ctx, n, v)
 	if err != nil {
 		return nil, err
 	}
-	p = ch.oooIR.SortedPostings(p)
+	p = hr.SortedPostings(p)
 
 	var lastSeq, lastOff int
 	for p.Next() {
@@ -344,7 +263,7 @@ func (ch *OOOCompactionHead) Index() (IndexReader, error) {
 }
 
 func (ch *OOOCompactionHead) Chunks() (ChunkReader, error) {
-	return NewHeadAndOOOChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt, nil, nil, ch.lastMmapRef), nil
+	return NewHeadAndOOOChunkReader(ch.head, ch.mint, ch.maxt, nil, nil, ch.lastMmapRef), nil
 }
 
 func (ch *OOOCompactionHead) Tombstones() (tombstones.Reader, error) {
@@ -370,12 +289,12 @@ func (ch *OOOCompactionHead) Meta() BlockMeta {
 // Only the method of BlockReader interface are valid for the cloned OOOCompactionHead.
 func (ch *OOOCompactionHead) CloneForTimeRange(mint, maxt int64) *OOOCompactionHead {
 	return &OOOCompactionHead{
-		oooIR:       NewOOOHeadIndexReader(ch.oooIR.head, mint, maxt, 0),
+		head:        ch.head,
 		lastMmapRef: ch.lastMmapRef,
 		postings:    ch.postings,
 		chunkRange:  ch.chunkRange,
-		mint:        ch.mint,
-		maxt:        ch.maxt,
+		mint:        mint,
+		maxt:        maxt,
 	}
 }
 
@@ -395,7 +314,8 @@ func NewOOOCompactionHeadIndexReader(ch *OOOCompactionHead) IndexReader {
 }
 
 func (ir *OOOCompactionHeadIndexReader) Symbols() index.StringIter {
-	return ir.ch.oooIR.Symbols()
+	hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt}
+	return hr.Symbols()
 }
 
 func (ir *OOOCompactionHeadIndexReader) Postings(_ context.Context, name string, values ...string) (index.Postings, error) {
@@ -416,11 +336,28 @@ func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.P
 }
 
 func (ir *OOOCompactionHeadIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings {
-	return ir.ch.oooIR.ShardedPostings(p, shardIndex, shardCount)
+	hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt}
+	return hr.ShardedPostings(p, shardIndex, shardCount)
 }
 
 func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
-	return ir.ch.oooIR.series(ref, builder, chks, 0, ir.ch.lastMmapRef)
+	s := ir.ch.head.series.getByID(chunks.HeadSeriesRef(ref))
+
+	if s == nil {
+		ir.ch.head.metrics.seriesNotFound.Inc()
+		return storage.ErrNotFound
+	}
+	builder.Assign(s.lset)
+
+	s.Lock()
+	defer s.Unlock()
+	*chks = (*chks)[:0]
+
+	if s.ooo == nil {
+		return nil
+	}
+
+	return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, chks)
 }
 
 func (ir *OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
@@ -448,7 +385,7 @@ func (ir *OOOCompactionHeadIndexReader) LabelNamesFor(ctx context.Context, posti
 }
 
 func (ir *OOOCompactionHeadIndexReader) Close() error {
-	return ir.ch.oooIR.Close()
+	return nil
 }
 
 // HeadAndOOOQuerier queries both the head and the out-of-order head.
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index c0b130ffbb..f71d497320 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -341,7 +341,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 						})
 					}
 
-					ir := NewOOOHeadIndexReader(h, tc.queryMinT, tc.queryMaxT, 0)
+					ir := NewHeadAndOOOIndexReader(h, tc.queryMinT, tc.queryMaxT, 0)
 
 					var chks []chunks.Meta
 					var b labels.ScratchBuilder
@@ -832,7 +832,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 
 			// The Series method populates the chunk metas, taking a copy of the
 			// head OOO chunk if necessary. These are then used by the ChunkReader.
-			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
+			ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
 			var chks []chunks.Meta
 			var b labels.ScratchBuilder
 			err = ir.Series(s1Ref, &b, &chks)
@@ -997,7 +997,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(
 
 			// The Series method populates the chunk metas, taking a copy of the
 			// head OOO chunk if necessary. These are then used by the ChunkReader.
-			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
+			ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0)
 			var chks []chunks.Meta
 			var b labels.ScratchBuilder
 			err = ir.Series(s1Ref, &b, &chks)

From e95607b2765bf9b0492342d08b07c3b5e31089bc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 16 Jul 2024 14:18:55 +0100
Subject: [PATCH 27/51] TSDB: Lock round access to labels, where necessary

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index aad1d2fa8f..e1881aef86 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -347,7 +347,7 @@ func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *l
 		ir.ch.head.metrics.seriesNotFound.Inc()
 		return storage.ErrNotFound
 	}
-	builder.Assign(s.lset)
+	builder.Assign(s.labels())
 
 	s.Lock()
 	defer s.Unlock()
@@ -494,7 +494,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 		oh.head.metrics.seriesNotFound.Inc()
 		return storage.ErrNotFound
 	}
-	builder.Assign(s.lset)
+	builder.Assign(s.labels())
 
 	if chks == nil {
 		return nil

From 7ffd3ca2807326b76d1c2c19dc769163a9280eed Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 16 Jul 2024 14:20:18 +0100
Subject: [PATCH 28/51] TSDB: Cosmetic: move HeadAndOOO implementations where
 old code was

This makes the diffs easier to follow.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/ooo_head_read.go | 221 +++++++++++++++++++++---------------------
 1 file changed, 112 insertions(+), 109 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index e1881aef86..aaaa249639 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -30,6 +30,13 @@ import (
 	"github.com/prometheus/prometheus/util/annotations"
 )
 
+var _ IndexReader = &HeadAndOOOIndexReader{}
+
+type HeadAndOOOIndexReader struct {
+	*headIndexReader            // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
+	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
+}
+
 var _ chunkenc.Iterable = &mergedOOOChunks{}
 
 // mergedOOOChunks holds the list of iterables for overlapping chunks.
@@ -41,6 +48,39 @@ func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator
 	return storage.ChainSampleIteratorFromIterables(iterator, o.chunkIterables)
 }
 
+func NewHeadAndOOOIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader {
+	hr := &headIndexReader{
+		head: head,
+		mint: mint,
+		maxt: maxt,
+	}
+	return &HeadAndOOOIndexReader{hr, lastGarbageCollectedMmapRef}
+}
+
+func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
+	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
+
+	if s == nil {
+		oh.head.metrics.seriesNotFound.Inc()
+		return storage.ErrNotFound
+	}
+	builder.Assign(s.labels())
+
+	if chks == nil {
+		return nil
+	}
+
+	s.Lock()
+	defer s.Unlock()
+	*chks = (*chks)[:0]
+
+	if s.ooo != nil {
+		return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks)
+	}
+	getSeriesChunks(s, oh.mint, oh.maxt, chks)
+	return nil
+}
+
 // lastGarbageCollectedMmapRef gives the last mmap chunk that may be being garbage collected and so
 // any chunk at or before this ref will not be considered. 0 disables this check.
 //
@@ -126,6 +166,20 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 	return nil
 }
 
+// LabelValues needs to be overridden from the headIndexReader implementation
+// so we can return labels within either in-order range or ooo range.
+func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
+	if oh.maxt < oh.head.MinTime() && oh.maxt < oh.head.MinOOOTime() || oh.mint > oh.head.MaxTime() && oh.mint > oh.head.MaxOOOTime() {
+		return []string{}, nil
+	}
+
+	if len(matchers) == 0 {
+		return oh.head.postings.LabelValues(ctx, name), nil
+	}
+
+	return labelValuesWithMatchers(ctx, oh, name, matchers...)
+}
+
 type chunkMetaAndChunkDiskMapperRef struct {
 	meta chunks.Meta
 	ref  chunks.ChunkDiskMapperRef
@@ -167,6 +221,64 @@ func lessByMinTimeAndMinRef(a, b chunks.Meta) int {
 	}
 }
 
+type HeadAndOOOChunkReader struct {
+	head        *Head
+	mint, maxt  int64
+	cr          *headChunkReader // If nil, only read OOO chunks.
+	maxMmapRef  chunks.ChunkDiskMapperRef
+	oooIsoState *oooIsolationState
+}
+
+func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, oooIsoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOChunkReader {
+	return &HeadAndOOOChunkReader{
+		head:        head,
+		mint:        mint,
+		maxt:        maxt,
+		cr:          cr,
+		maxMmapRef:  maxMmapRef,
+		oooIsoState: oooIsoState,
+	}
+}
+
+func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
+	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
+	if !isOOO {
+		return cr.cr.ChunkOrIterable(meta)
+	}
+
+	s := cr.head.series.getByID(sid)
+	// This means that the series has been garbage collected.
+	if s == nil {
+		return nil, nil, storage.ErrNotFound
+	}
+
+	s.Lock()
+	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
+	s.Unlock()
+
+	return nil, mc, err
+}
+
+// Pass through special behaviour for current head chunk.
+func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
+	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
+	if !isOOO {
+		return cr.cr.ChunkOrIterableWithCopy(meta)
+	}
+	chk, iter, err := cr.ChunkOrIterable(meta)
+	return chk, iter, 0, err
+}
+
+func (cr *HeadAndOOOChunkReader) Close() error {
+	if cr.cr != nil && cr.cr.isoState != nil {
+		cr.cr.isoState.Close()
+	}
+	if cr.oooIsoState != nil {
+		cr.oooIsoState.Close()
+	}
+	return nil
+}
+
 type OOOCompactionHead struct {
 	head        *Head
 	lastMmapRef chunks.ChunkDiskMapperRef
@@ -473,112 +585,3 @@ func (q *HeadAndOOOChunkQuerier) Close() error {
 func (q *HeadAndOOOChunkQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.ChunkSeriesSet {
 	return selectChunkSeriesSet(ctx, sortSeries, hints, matchers, rangeHeadULID, q.index, q.chunkr, q.head.tombstones, q.mint, q.maxt)
 }
-
-type HeadAndOOOIndexReader struct {
-	*headIndexReader            // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
-	lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef
-}
-
-func NewHeadAndOOOIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader {
-	hr := &headIndexReader{
-		head: head,
-		mint: mint,
-		maxt: maxt,
-	}
-	return &HeadAndOOOIndexReader{hr, lastGarbageCollectedMmapRef}
-}
-
-func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
-	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
-	if s == nil {
-		oh.head.metrics.seriesNotFound.Inc()
-		return storage.ErrNotFound
-	}
-	builder.Assign(s.labels())
-
-	if chks == nil {
-		return nil
-	}
-
-	s.Lock()
-	defer s.Unlock()
-	*chks = (*chks)[:0]
-
-	if s.ooo != nil {
-		return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks)
-	}
-	getSeriesChunks(s, oh.mint, oh.maxt, chks)
-	return nil
-}
-
-// LabelValues needs to be overridden from the headIndexReader implementation
-// so we can return labels within either in-order range or ooo range.
-func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
-	if oh.maxt < oh.head.MinTime() && oh.maxt < oh.head.MinOOOTime() || oh.mint > oh.head.MaxTime() && oh.mint > oh.head.MaxOOOTime() {
-		return []string{}, nil
-	}
-
-	if len(matchers) == 0 {
-		return oh.head.postings.LabelValues(ctx, name), nil
-	}
-
-	return labelValuesWithMatchers(ctx, oh, name, matchers...)
-}
-
-type HeadAndOOOChunkReader struct {
-	head        *Head
-	mint, maxt  int64
-	cr          *headChunkReader // If nil, only read OOO chunks.
-	maxMmapRef  chunks.ChunkDiskMapperRef
-	oooIsoState *oooIsolationState
-}
-
-func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, oooIsoState *oooIsolationState, maxMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOChunkReader {
-	return &HeadAndOOOChunkReader{
-		head:        head,
-		mint:        mint,
-		maxt:        maxt,
-		cr:          cr,
-		maxMmapRef:  maxMmapRef,
-		oooIsoState: oooIsoState,
-	}
-}
-
-func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
-	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
-		return cr.cr.ChunkOrIterable(meta)
-	}
-
-	s := cr.head.series.getByID(sid)
-	// This means that the series has been garbage collected.
-	if s == nil {
-		return nil, nil, storage.ErrNotFound
-	}
-
-	s.Lock()
-	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
-	s.Unlock()
-
-	return nil, mc, err
-}
-
-// Pass through special behaviour for current head chunk.
-func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
-	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
-		return cr.cr.ChunkOrIterableWithCopy(meta)
-	}
-	chk, iter, err := cr.ChunkOrIterable(meta)
-	return chk, iter, 0, err
-}
-
-func (cr *HeadAndOOOChunkReader) Close() error {
-	if cr.cr != nil && cr.cr.isoState != nil {
-		cr.cr.isoState.Close()
-	}
-	if cr.oooIsoState != nil {
-		cr.oooIsoState.Close()
-	}
-	return nil
-}

From 9135da1e4f24850008493c3b27f866123c761bdb Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 12 Aug 2024 17:14:41 +0100
Subject: [PATCH 29/51] TSDB: Review feedback

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Re-enable check in `createHeadWithOOOSamples` which wasn't really broken.
* Move code making `Block` into a `Queryable` into test file.
* Make `getSeriesChunks` return a slice (renamed `appendSeriesChunks`).
* Rename `oooMergedChunks` to `mergedChunks`.
* Improve comment on `ChunkOrIterableWithCopy`.
* Name return values from unpackHeadChunkRef.

Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/block.go              |  5 -----
 tsdb/head_read.go          | 19 +++++++++----------
 tsdb/ooo_head_read.go      |  9 +++++----
 tsdb/querier_bench_test.go |  9 ++++++++-
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/tsdb/block.go b/tsdb/block.go
index c55e22ce51..2f32733f8c 100644
--- a/tsdb/block.go
+++ b/tsdb/block.go
@@ -467,11 +467,6 @@ func (pb *Block) setCompactionFailed() error {
 	return nil
 }
 
-// Querier implements Queryable.
-func (pb *Block) Querier(mint, maxt int64) (storage.Querier, error) {
-	return NewBlockQuerier(pb, mint, maxt)
-}
-
 type blockIndexReader struct {
 	ir IndexReader
 	b  *Block
diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index 977d6b978d..47f12df994 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -199,19 +199,18 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchB
 	defer s.Unlock()
 
 	*chks = (*chks)[:0]
-
-	getSeriesChunks(s, h.mint, h.maxt, chks)
+	*chks = appendSeriesChunks(s, h.mint, h.maxt, *chks)
 
 	return nil
 }
 
-func getSeriesChunks(s *memSeries, mint, maxt int64, chks *[]chunks.Meta) {
+func appendSeriesChunks(s *memSeries, mint, maxt int64, chks []chunks.Meta) []chunks.Meta {
 	for i, c := range s.mmappedChunks {
 		// Do not expose chunks that are outside of the specified range.
 		if !c.OverlapsClosedInterval(mint, maxt) {
 			continue
 		}
-		*chks = append(*chks, chunks.Meta{
+		chks = append(chks, chunks.Meta{
 			MinTime: c.minTime,
 			MaxTime: c.maxTime,
 			Ref:     chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.headChunkID(i))),
@@ -230,7 +229,7 @@ func getSeriesChunks(s *memSeries, mint, maxt int64, chks *[]chunks.Meta) {
 				maxTime = chk.maxTime
 			}
 			if chk.OverlapsClosedInterval(mint, maxt) {
-				*chks = append(*chks, chunks.Meta{
+				chks = append(chks, chunks.Meta{
 					MinTime: chk.minTime,
 					MaxTime: maxTime,
 					Ref:     chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.headChunkID(len(s.mmappedChunks)+j))),
@@ -239,6 +238,7 @@ func getSeriesChunks(s *memSeries, mint, maxt int64, chks *[]chunks.Meta) {
 			j++
 		}
 	}
+	return chks
 }
 
 // headChunkID returns the HeadChunkID referred to by the given position.
@@ -259,7 +259,7 @@ func (s *memSeries) oooHeadChunkID(pos int) chunks.HeadChunkID {
 	return (chunks.HeadChunkID(pos) + s.ooo.firstOOOChunkID) | oooChunkIDMask
 }
 
-func unpackHeadChunkRef(ref chunks.ChunkRef) (chunks.HeadSeriesRef, chunks.HeadChunkID, bool) {
+func unpackHeadChunkRef(ref chunks.ChunkRef) (seriesID chunks.HeadSeriesRef, chunkID chunks.HeadChunkID, isOOO bool) {
 	sid, cid := chunks.HeadChunkRef(ref).Unpack()
 	return sid, (cid & (oooChunkIDMask - 1)), (cid & oooChunkIDMask) != 0
 }
@@ -481,14 +481,14 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 	return elem, true, offset == 0, nil
 }
 
-// oooMergedChunks return an iterable over one or more OOO chunks for the given
+// mergedChunks return an iterable over one or more OOO chunks for the given
 // chunks.Meta reference from memory or by m-mapping it from the disk. The
 // returned iterable will be a merge of all the overlapping chunks, if any,
 // amongst all the chunks in the OOOHead.
 // If hr is non-nil then in-order chunks are included.
 // This function is not thread safe unless the caller holds a lock.
 // The caller must ensure that s.ooo is not nil.
-func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (*mergedOOOChunks, error) {
+func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (chunkenc.Iterable, error) {
 	_, cid, _ := unpackHeadChunkRef(meta.Ref)
 
 	// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
@@ -531,8 +531,7 @@ func (s *memSeries) oooMergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMappe
 	}
 
 	if hr != nil { // Include in-order chunks.
-		var metas []chunks.Meta
-		getSeriesChunks(s, max(meta.MinTime, mint), min(meta.MaxTime, maxt), &metas)
+		metas := appendSeriesChunks(s, max(meta.MinTime, mint), min(meta.MaxTime, maxt), nil)
 		for _, m := range metas {
 			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
 				meta: m,
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index aaaa249639..47e2efb866 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -77,7 +77,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S
 	if s.ooo != nil {
 		return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks)
 	}
-	getSeriesChunks(s, oh.mint, oh.maxt, chks)
+	*chks = appendSeriesChunks(s, oh.mint, oh.maxt, *chks)
 	return nil
 }
 
@@ -127,7 +127,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 	}
 
 	if includeInOrder {
-		getSeriesChunks(s, mint, maxt, &tmpChks)
+		tmpChks = appendSeriesChunks(s, mint, maxt, tmpChks)
 	}
 
 	// There is nothing to do if we did not collect any chunk.
@@ -253,13 +253,14 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu
 	}
 
 	s.Lock()
-	mc, err := s.oooMergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
+	mc, err := s.mergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
 	s.Unlock()
 
 	return nil, mc, err
 }
 
-// Pass through special behaviour for current head chunk.
+// ChunkOrIterableWithCopy: implements ChunkReaderWithCopy. The special Copy behaviour
+// is only implemented for the in-order head chunk.
 func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
 	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
 	if !isOOO {
diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go
index e3e457d07a..43accc253b 100644
--- a/tsdb/querier_bench_test.go
+++ b/tsdb/querier_bench_test.go
@@ -321,10 +321,17 @@ func BenchmarkQuerierSelect(b *testing.B) {
 			require.NoError(b, block.Close())
 		}()
 
-		benchmarkSelect(b, block, numSeries, false)
+		benchmarkSelect(b, (*queryableBlock)(block), numSeries, false)
 	})
 }
 
+// Type wrapper to let a Block be a Queryable in benchmarkSelect().
+type queryableBlock Block
+
+func (pb *queryableBlock) Querier(mint, maxt int64) (storage.Querier, error) {
+	return NewBlockQuerier((*Block)(pb), mint, maxt)
+}
+
 func BenchmarkQuerierSelectWithOutOfOrder(b *testing.B) {
 	numSeries := 1000000
 	_, db := createHeadForBenchmarkSelect(b, numSeries, func(app storage.Appender, i int) {

From 512c67ec26e764e7adb4d2746ecf71d2222701f5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 12 Aug 2024 18:49:00 +0100
Subject: [PATCH 30/51] TSDB: Never go over maximum number of OOO chunks

In `mmapCurrentOOOHeadChunk`, check if the number is at the maximum and
drop the data with an error log. This is not expected to happen as the
maximum is over 8 million; that's 8 years of 1 sample every second.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 tsdb/head_append.go   | 20 +++++++++++++-------
 tsdb/head_wal.go      |  2 +-
 tsdb/ooo_head_read.go |  2 +-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index 59681b8da7..b66ac72788 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -19,6 +19,7 @@ import (
 	"fmt"
 	"math"
 
+	"github.com/go-kit/log"
 	"github.com/go-kit/log/level"
 
 	"github.com/prometheus/prometheus/model/exemplar"
@@ -936,7 +937,7 @@ func (a *headAppender) Commit() (err error) {
 			// Sample is OOO and OOO handling is enabled
 			// and the delta is within the OOO tolerance.
 			var mmapRefs []chunks.ChunkDiskMapperRef
-			ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax)
+			ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger)
 			if chunkCreated {
 				r, ok := oooMmapMarkers[series.ref]
 				if !ok || r != nil {
@@ -1083,14 +1084,14 @@ func (a *headAppender) Commit() (err error) {
 }
 
 // insert is like append, except it inserts. Used for OOO samples.
-func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
+func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger log.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) {
 	if s.ooo == nil {
 		s.ooo = &memSeriesOOOFields{}
 	}
 	c := s.ooo.oooHeadChunk
 	if c == nil || c.chunk.NumSamples() == int(oooCapMax) {
 		// Note: If no new samples come in then we rely on compaction to clean up stale in-memory OOO chunks.
-		c, mmapRefs = s.cutNewOOOHeadChunk(t, chunkDiskMapper)
+		c, mmapRefs = s.cutNewOOOHeadChunk(t, chunkDiskMapper, logger)
 		chunkCreated = true
 	}
 
@@ -1444,9 +1445,9 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange
 }
 
 // cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk.
-// The caller must ensure that s.ooo is not nil.
-func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
-	ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper)
+// The caller must ensure that s is locked and s.ooo is not nil.
+func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) {
+	ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger)
 
 	s.ooo.oooHeadChunk = &oooHeadChunk{
 		chunk:   NewOOOChunk(),
@@ -1457,7 +1458,8 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk
 	return s.ooo.oooHeadChunk, ref
 }
 
-func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper) []chunks.ChunkDiskMapperRef {
+// s must be locked when calling.
+func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) []chunks.ChunkDiskMapperRef {
 	if s.ooo == nil || s.ooo.oooHeadChunk == nil {
 		// OOO is not enabled or there is no head chunk, so nothing to m-map here.
 		return nil
@@ -1469,6 +1471,10 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap
 	}
 	chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, 1)
 	for _, memchunk := range chks {
+		if len(s.ooo.oooMmappedChunks) >= (oooChunkIDMask - 1) {
+			level.Error(logger).Log("msg", "Too many OOO chunks, dropping data", "series", s.lset.String())
+			break
+		}
 		chunkRef := chunkDiskMapper.WriteChunk(s.ref, s.ooo.oooHeadChunk.minTime, s.ooo.oooHeadChunk.maxTime, memchunk.chunk, true, handleChunkWriteError)
 		chunkRefs = append(chunkRefs, chunkRef)
 		s.ooo.oooMmappedChunks = append(s.ooo.oooMmappedChunks, &mmappedChunk{
diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go
index 85b0c656de..7397bbf413 100644
--- a/tsdb/head_wal.go
+++ b/tsdb/head_wal.go
@@ -890,7 +890,7 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) {
 				unknownRefs++
 				continue
 			}
-			ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax)
+			ok, chunkCreated, _ := ms.insert(s.T, s.V, nil, nil, h.chunkDiskMapper, oooCapMax, h.logger)
 			if chunkCreated {
 				h.metrics.chunksCreated.Inc()
 				h.metrics.chunks.Inc()
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 47e2efb866..55e241fd90 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -340,7 +340,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead,
 		}
 
 		var lastMmapRef chunks.ChunkDiskMapperRef
-		mmapRefs := ms.mmapCurrentOOOHeadChunk(head.chunkDiskMapper)
+		mmapRefs := ms.mmapCurrentOOOHeadChunk(head.chunkDiskMapper, head.logger)
 		if len(mmapRefs) == 0 && len(ms.ooo.oooMmappedChunks) > 0 {
 			// Nothing was m-mapped. So take the mmapRef from the existing slice if it exists.
 			mmapRefs = []chunks.ChunkDiskMapperRef{ms.ooo.oooMmappedChunks[len(ms.ooo.oooMmappedChunks)-1].ref}

From 1800af54f0dce09c37fba6d7f61e14c9621b8321 Mon Sep 17 00:00:00 2001
From: cuiweiyuan <cuiweiyuan@aliyun.com.>
Date: Thu, 8 Aug 2024 15:08:10 +0800
Subject: [PATCH 31/51] chore: fix some function names

Signed-off-by: cuiweiyuan <cuiweiyuan@aliyun.com>
---
 discovery/kubernetes/endpoints_test.go | 2 +-
 promql/engine.go                       | 2 +-
 storage/series_test.go                 | 2 +-
 tsdb/head_test.go                      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go
index e877657dba..3ea98c5db9 100644
--- a/discovery/kubernetes/endpoints_test.go
+++ b/discovery/kubernetes/endpoints_test.go
@@ -970,7 +970,7 @@ func TestEndpointsDiscoveryEmptyPodStatus(t *testing.T) {
 	}.Run(t)
 }
 
-// TestEndpointsUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
+// TestEndpointsDiscoveryUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
 // See https://github.com/prometheus/prometheus/issues/11305 for more details.
 func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
 	pod := &v1.Pod{
diff --git a/promql/engine.go b/promql/engine.go
index 1427302e5e..a118672cf3 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -3161,7 +3161,7 @@ seriesLoop:
 	return mat, annos
 }
 
-// aggregationK evaluates count_values on vec.
+// aggregationCountValues evaluates count_values on vec.
 // Outputs as many series per group as there are values in the input.
 func (ev *evaluator) aggregationCountValues(e *parser.AggregateExpr, grouping []string, valueLabel string, vec Vector, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	type groupCount struct {
diff --git a/storage/series_test.go b/storage/series_test.go
index 51886f409b..f8ba2af67c 100644
--- a/storage/series_test.go
+++ b/storage/series_test.go
@@ -72,7 +72,7 @@ func TestListSeriesIterator(t *testing.T) {
 	require.Equal(t, chunkenc.ValNone, it.Seek(2))
 }
 
-// TestSeriesSetToChunkSet test the property of SeriesSet that says
+// TestChunkSeriesSetToSeriesSet test the property of SeriesSet that says
 // returned series should be iterable even after Next is called.
 func TestChunkSeriesSetToSeriesSet(t *testing.T) {
 	series := []struct {
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index fb73a36385..f73ce38ba7 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -5695,7 +5695,7 @@ func TestCuttingNewHeadChunks(t *testing.T) {
 	}
 }
 
-// TestHeadDetectsDuplcateSampleAtSizeLimit tests a regression where a duplicate sample
+// TestHeadDetectsDuplicateSampleAtSizeLimit tests a regression where a duplicate sample
 // is appended to the head, right when the head chunk is at the size limit.
 // The test adds all samples as duplicate, thus expecting that the result has
 // exactly half of the samples.

From 9e7308de386a23a5d3c8fd2ba28ca7aa55606bc7 Mon Sep 17 00:00:00 2001
From: Owen Williams <owen.williams@grafana.com>
Date: Thu, 18 Jul 2024 14:08:21 -0400
Subject: [PATCH 32/51] feat(utf8): utf8 content negotation and flags

Signed-off-by: Owen Williams <owen.williams@grafana.com>
---
 cmd/prometheus/main.go              | 17 ++++++++++++++-
 config/config.go                    | 21 ++++++++++++++++++
 docs/command-line/prometheus.md     |  3 ++-
 docs/configuration/configuration.md | 10 +++++++++
 docs/feature_flags.md               |  8 +++++++
 scrape/manager.go                   |  2 ++
 scrape/scrape.go                    | 23 ++++++++++++++++----
 scrape/scrape_test.go               | 33 ++++++++++++++++++++++-------
 8 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 51320c6611..94924d2c4e 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -152,6 +152,7 @@ type flagConfig struct {
 	queryConcurrency    int
 	queryMaxSamples     int
 	RemoteFlushDeadline model.Duration
+	nameEscapingScheme  string
 
 	featureList   []string
 	memlimitRatio float64
@@ -237,6 +238,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
 			case "delayed-compaction":
 				c.tsdb.EnableDelayedCompaction = true
 				level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
+			case "utf8-names":
+				model.NameValidationScheme = model.UTF8Validation
+				level.Info(logger).Log("msg", "Experimental UTF-8 support enabled")
 			case "":
 				continue
 			case "promql-at-modifier", "promql-negative-offset":
@@ -481,7 +485,9 @@ func main() {
 	a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
 		Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
 
-	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
+	a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme)
+
+	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
 		Default("").StringsVar(&cfg.featureList)
 
 	promlogflag.AddFlags(a, &cfg.promlogConfig)
@@ -509,6 +515,15 @@ func main() {
 		os.Exit(1)
 	}
 
+	if cfg.nameEscapingScheme != "" {
+		scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme)
+			os.Exit(1)
+		}
+		model.NameEscapingScheme = scheme
+	}
+
 	if agentMode && len(serverOnlyFlags) > 0 {
 		fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags)
 		os.Exit(3)
diff --git a/config/config.go b/config/config.go
index 7632a444fe..4326b0a992 100644
--- a/config/config.go
+++ b/config/config.go
@@ -67,6 +67,11 @@ var (
 	}
 )
 
+const (
+	LegacyValidationConfig = "legacy"
+	UTF8ValidationConfig   = "utf8"
+)
+
 // Load parses the YAML input s into a Config.
 func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
 	cfg := &Config{}
@@ -446,6 +451,8 @@ type GlobalConfig struct {
 	// Keep no more than this many dropped targets per job.
 	// 0 means no limit.
 	KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
+	// Allow UTF8 Metric and Label Names.
+	MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
 }
 
 // ScrapeProtocol represents supported protocol for scraping metrics.
@@ -471,6 +478,7 @@ var (
 	PrometheusText0_0_4  ScrapeProtocol = "PrometheusText0.0.4"
 	OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
 	OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
+	UTF8NamesHeader      string         = model.EscapingKey + "=" + model.AllowUTF8
 
 	ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
 		PrometheusProto:      "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
@@ -656,6 +664,8 @@ type ScrapeConfig struct {
 	// Keep no more than this many dropped targets per job.
 	// 0 means no limit.
 	KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
+	// Allow UTF8 Metric and Label Names.
+	MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
 
 	// We cannot do proper Go type embedding below as the parser will then parse
 	// values arbitrarily into the overflow maps of further-down types.
@@ -762,6 +772,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
 		return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
 	}
 
+	switch globalConfig.MetricNameValidationScheme {
+	case "", LegacyValidationConfig:
+	case UTF8ValidationConfig:
+		if model.NameValidationScheme != model.UTF8Validation {
+			return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names")
+		}
+	default:
+		return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
+	}
+	c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
+
 	return nil
 }
 
diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md
index b8f2e4241f..a16e807e1c 100644
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@@ -56,7 +56,8 @@ The Prometheus monitoring server
 | <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
 | <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
 | <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
-| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
+| <code class="text-nowrap">--scrape.name-escaping-scheme</code> | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` |
+| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
 | <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
 | <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |
 
diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md
index 313a7f2f37..bc684b6f98 100644
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@@ -121,6 +121,11 @@ global:
   # that will be kept in memory. 0 means no limit.
   [ keep_dropped_targets: <int> | default = 0 ]
 
+  # Specifies the validation scheme for metric and label names. Either blank or
+  # "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
+  # UTF-8 support.
+  [ metric_name_validation_scheme <string> | default "legacy" ]
+
 runtime:
   # Configure the Go garbage collector GOGC parameter
   # See: https://tip.golang.org/doc/gc-guide#GOGC
@@ -461,6 +466,11 @@ metric_relabel_configs:
 # that will be kept in memory. 0 means no limit.
 [ keep_dropped_targets: <int> | default = 0 ]
 
+# Specifies the validation scheme for metric and label names. Either blank or
+# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full
+# UTF-8 support.
+[ metric_name_validation_scheme <string> | default "legacy" ]
+
 # Limit on total number of positive and negative buckets allowed in a single
 # native histogram. The resolution of a histogram with more buckets will be
 # reduced until the number of buckets is within the limit. If the limit cannot
diff --git a/docs/feature_flags.md b/docs/feature_flags.md
index c9a3558fa6..0a908bb91d 100644
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@@ -249,3 +249,11 @@ In the event of multiple consecutive Head compactions being possible, only the f
 Note that during this delay, the Head continues its usual operations, which include serving and appending series.
 
 Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place.
+
+## UTF-8 Name Support
+
+`--enable-feature=utf8-names`
+
+When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set.
+By itself, this flag does not enable the request of UTF-8 names via content negotiation.
+Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis.
diff --git a/scrape/manager.go b/scrape/manager.go
index 156e949f83..6d4e8707bb 100644
--- a/scrape/manager.go
+++ b/scrape/manager.go
@@ -93,6 +93,8 @@ type Options struct {
 	skipOffsetting bool
 }
 
+const DefaultNameEscapingScheme = model.ValueEncodingEscaping
+
 // Manager maintains a set of scrape pools and manages start/stop cycles
 // when receiving new target groups from the discovery manager.
 type Manager struct {
diff --git a/scrape/scrape.go b/scrape/scrape.go
index 68411a62e0..9979f7361c 100644
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@@ -303,6 +303,11 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
 		mrc                      = sp.config.MetricRelabelConfigs
 	)
 
+	validationScheme := model.LegacyValidation
+	if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
+		validationScheme = model.UTF8Validation
+	}
+
 	sp.targetMtx.Lock()
 
 	forcedErr := sp.refreshTargetLimitErr()
@@ -323,7 +328,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
 				client:               sp.client,
 				timeout:              timeout,
 				bodySizeLimit:        bodySizeLimit,
-				acceptHeader:         acceptHeader(sp.config.ScrapeProtocols),
+				acceptHeader:         acceptHeader(sp.config.ScrapeProtocols, validationScheme),
 				acceptEncodingHeader: acceptEncodingHeader(enableCompression),
 			}
 			newLoop = sp.newLoop(scrapeLoopOptions{
@@ -452,6 +457,11 @@ func (sp *scrapePool) sync(targets []*Target) {
 		scrapeClassicHistograms  = sp.config.ScrapeClassicHistograms
 	)
 
+	validationScheme := model.LegacyValidation
+	if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig {
+		validationScheme = model.UTF8Validation
+	}
+
 	sp.targetMtx.Lock()
 	for _, t := range targets {
 		hash := t.hash()
@@ -467,7 +477,7 @@ func (sp *scrapePool) sync(targets []*Target) {
 				client:               sp.client,
 				timeout:              timeout,
 				bodySizeLimit:        bodySizeLimit,
-				acceptHeader:         acceptHeader(sp.config.ScrapeProtocols),
+				acceptHeader:         acceptHeader(sp.config.ScrapeProtocols, validationScheme),
 				acceptEncodingHeader: acceptEncodingHeader(enableCompression),
 				metrics:              sp.metrics,
 			}
@@ -714,11 +724,16 @@ var errBodySizeLimit = errors.New("body size limit exceeded")
 // acceptHeader transforms preference from the options into specific header values as
 // https://www.rfc-editor.org/rfc/rfc9110.html#name-accept defines.
 // No validation is here, we expect scrape protocols to be validated already.
-func acceptHeader(sps []config.ScrapeProtocol) string {
+func acceptHeader(sps []config.ScrapeProtocol, scheme model.ValidationScheme) string {
 	var vals []string
 	weight := len(config.ScrapeProtocolsHeaders) + 1
 	for _, sp := range sps {
-		vals = append(vals, fmt.Sprintf("%s;q=0.%d", config.ScrapeProtocolsHeaders[sp], weight))
+		val := config.ScrapeProtocolsHeaders[sp]
+		if scheme == model.UTF8Validation {
+			val += ";" + config.UTF8NamesHeader
+		}
+		val += fmt.Sprintf(";q=0.%d", weight)
+		vals = append(vals, val)
 		weight--
 	}
 	// Default match anything.
diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go
index a3fe6ac1a5..be81b8677c 100644
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@@ -2339,11 +2339,15 @@ func TestTargetScraperScrapeOK(t *testing.T) {
 	)
 
 	var protobufParsing bool
+	var allowUTF8 bool
 
 	server := httptest.NewServer(
 		http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			accept := r.Header.Get("Accept")
+			if allowUTF8 {
+				require.Truef(t, strings.Contains(accept, "escaping=allow-utf-8"), "Expected Accept header to allow utf8, got %q", accept)
+			}
 			if protobufParsing {
-				accept := r.Header.Get("Accept")
 				require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"),
 					"Expected Accept header to prefer application/vnd.google.protobuf.")
 			}
@@ -2351,7 +2355,11 @@ func TestTargetScraperScrapeOK(t *testing.T) {
 			timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds")
 			require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.")
 
-			w.Header().Set("Content-Type", `text/plain; version=0.0.4`)
+			if allowUTF8 {
+				w.Header().Set("Content-Type", `text/plain; version=1.0.0; escaping=allow-utf-8`)
+			} else {
+				w.Header().Set("Content-Type", `text/plain; version=0.0.4`)
+			}
 			w.Write([]byte("metric_a 1\nmetric_b 2\n"))
 		}),
 	)
@@ -2380,13 +2388,22 @@ func TestTargetScraperScrapeOK(t *testing.T) {
 		require.NoError(t, err)
 		contentType, err := ts.readResponse(context.Background(), resp, &buf)
 		require.NoError(t, err)
-		require.Equal(t, "text/plain; version=0.0.4", contentType)
+		if allowUTF8 {
+			require.Equal(t, "text/plain; version=1.0.0; escaping=allow-utf-8", contentType)
+		} else {
+			require.Equal(t, "text/plain; version=0.0.4", contentType)
+		}
 		require.Equal(t, "metric_a 1\nmetric_b 2\n", buf.String())
 	}
 
-	runTest(acceptHeader(config.DefaultScrapeProtocols))
+	runTest(acceptHeader(config.DefaultScrapeProtocols, model.LegacyValidation))
 	protobufParsing = true
-	runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols))
+	runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.LegacyValidation))
+	protobufParsing = false
+	allowUTF8 = true
+	runTest(acceptHeader(config.DefaultScrapeProtocols, model.UTF8Validation))
+	protobufParsing = true
+	runTest(acceptHeader(config.DefaultProtoFirstScrapeProtocols, model.UTF8Validation))
 }
 
 func TestTargetScrapeScrapeCancel(t *testing.T) {
@@ -2412,7 +2429,7 @@ func TestTargetScrapeScrapeCancel(t *testing.T) {
 			),
 		},
 		client:       http.DefaultClient,
-		acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
+		acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
 	}
 	ctx, cancel := context.WithCancel(context.Background())
 
@@ -2467,7 +2484,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) {
 			),
 		},
 		client:       http.DefaultClient,
-		acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
+		acceptHeader: acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
 	}
 
 	resp, err := ts.scrape(context.Background())
@@ -2511,7 +2528,7 @@ func TestTargetScraperBodySizeLimit(t *testing.T) {
 		},
 		client:        http.DefaultClient,
 		bodySizeLimit: bodySizeLimit,
-		acceptHeader:  acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols),
+		acceptHeader:  acceptHeader(config.DefaultGlobalConfig.ScrapeProtocols, model.LegacyValidation),
 		metrics:       newTestScrapeMetrics(t),
 	}
 	var buf bytes.Buffer

From 3b0a386c62513e0aaed04cd4d97beb9a9a8ab580 Mon Sep 17 00:00:00 2001
From: harshitasao <harshitasao@gmail.com>
Date: Sun, 18 Aug 2024 13:15:28 +0530
Subject: [PATCH 33/51] fix: fixed the vulnerability

Signed-off-by: harshitasao <harshitasao@gmail.com>
---
 documentation/examples/remote_storage/go.mod |  4 ++--
 documentation/examples/remote_storage/go.sum | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod
index 35dca85a07..bab39303d7 100644
--- a/documentation/examples/remote_storage/go.mod
+++ b/documentation/examples/remote_storage/go.mod
@@ -20,7 +20,7 @@ require (
 	github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect
 	github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
 	github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
-	github.com/aws/aws-sdk-go v1.53.16 // indirect
+	github.com/aws/aws-sdk-go v1.55.5 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
@@ -62,7 +62,7 @@ require (
 	golang.org/x/text v0.16.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
-	google.golang.org/grpc v1.64.0 // indirect
+	google.golang.org/grpc v1.65.0 // indirect
 	google.golang.org/protobuf v1.34.2 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum
index 4c420092f0..6e283cc749 100644
--- a/documentation/examples/remote_storage/go.sum
+++ b/documentation/examples/remote_storage/go.sum
@@ -26,8 +26,8 @@ github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8V
 github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
 github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
 github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
-github.com/aws/aws-sdk-go v1.53.16 h1:8oZjKQO/ml1WLUZw5hvF7pvYjPf8o9f57Wldoy/q9Qc=
-github.com/aws/aws-sdk-go v1.53.16/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
+github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
+github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
 github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps=
 github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
@@ -37,8 +37,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/cncf/xds/go v0.0.0-20240318125728-8a4994d93e50 h1:DBmgJDC9dTfkVyGgipamEh2BpGYxScCH1TOF1LL1cXc=
-github.com/cncf/xds/go v0.0.0-20240318125728-8a4994d93e50/go.mod h1:5e1+Vvlzido69INQaVO6d87Qn543Xr6nooe9Kz7oBFM=
+github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw=
+github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
@@ -402,8 +402,8 @@ google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:
 google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0=
-google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY=
-google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=

From 3a78e76282d47887a253a0fd19524ca49c4cabce Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Sun, 18 Aug 2024 11:27:04 +0200
Subject: [PATCH 34/51] Upgrade golangci-lint to v1.60.1

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 .github/workflows/ci.yml                |   2 +-
 Makefile.common                         |   2 +-
 cmd/promtool/tsdb.go                    |   8 +-
 discovery/eureka/client.go              |   1 -
 discovery/hetzner/robot.go              |   1 -
 discovery/kubernetes/kubernetes_test.go |   8 +-
 notifier/notifier.go                    |   2 +-
 promql/functions.go                     |  32 +++----
 promql/parser/lex.go                    |  12 +--
 scripts/golangci-lint.yml               |   2 +-
 storage/buffer_test.go                  |  16 ++--
 storage/remote/client.go                |   3 -
 storage/remote/queue_manager.go         |   8 +-
 storage/remote/write_handler_test.go    |   8 +-
 template/template.go                    |   2 +-
 tsdb/chunks/queue_test.go               |   8 +-
 tsdb/db.go                              |  16 ++--
 tsdb/db_test.go                         | 108 ++++++++++++------------
 tsdb/head_append.go                     |   8 +-
 tsdb/head_test.go                       |   4 +-
 tsdb/index/postingsstats_test.go        |  16 ++--
 tsdb/wlog/wlog.go                       |  10 +--
 util/testutil/directory.go              |   2 +-
 web/web.go                              |   4 +-
 24 files changed, 139 insertions(+), 144 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 92bbaebbfd..c3a1d68e98 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,7 +186,7 @@ jobs:
         with:
           args: --verbose
           # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
-          version: v1.59.1
+          version: v1.60.1
   fuzzing:
     uses: ./.github/workflows/fuzzing.yml
     if: github.event_name == 'pull_request'
diff --git a/Makefile.common b/Makefile.common
index e3da72ab47..2ecd5465c3 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -61,7 +61,7 @@ PROMU_URL     := https://github.com/prometheus/promu/releases/download/v$(PROMU_
 SKIP_GOLANGCI_LINT :=
 GOLANGCI_LINT :=
 GOLANGCI_LINT_OPTS ?=
-GOLANGCI_LINT_VERSION ?= v1.59.1
+GOLANGCI_LINT_VERSION ?= v1.60.1
 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
 # windows isn't included here because of the path separator being different.
 ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go
index 2ed7244b1c..b85a4fae8b 100644
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@@ -866,16 +866,16 @@ func displayHistogram(dataType string, datas []int, total int) {
 	fmt.Println()
 }
 
-func generateBucket(min, max int) (start, end, step int) {
-	s := (max - min) / 10
+func generateBucket(minVal, maxVal int) (start, end, step int) {
+	s := (maxVal - minVal) / 10
 
 	step = 10
 	for step < s && step <= 10000 {
 		step *= 10
 	}
 
-	start = min - min%step
-	end = max - max%step + step
+	start = minVal - minVal%step
+	end = maxVal - maxVal%step + step
 
 	return
 }
diff --git a/discovery/eureka/client.go b/discovery/eureka/client.go
index 5a90968f1b..52e8ce7b48 100644
--- a/discovery/eureka/client.go
+++ b/discovery/eureka/client.go
@@ -97,7 +97,6 @@ func fetchApps(ctx context.Context, server string, client *http.Client) (*Applic
 		resp.Body.Close()
 	}()
 
-	//nolint:usestdlibvars
 	if resp.StatusCode/100 != 2 {
 		return nil, fmt.Errorf("non 2xx status '%d' response during eureka service discovery", resp.StatusCode)
 	}
diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go
index 64155bfaed..516470b05a 100644
--- a/discovery/hetzner/robot.go
+++ b/discovery/hetzner/robot.go
@@ -87,7 +87,6 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
 		resp.Body.Close()
 	}()
 
-	//nolint:usestdlibvars
 	if resp.StatusCode/100 != 2 {
 		return nil, fmt.Errorf("non 2xx status '%d' response during hetzner service discovery with role robot", resp.StatusCode)
 	}
diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go
index 552f8a4453..50f25a20ab 100644
--- a/discovery/kubernetes/kubernetes_test.go
+++ b/discovery/kubernetes/kubernetes_test.go
@@ -154,7 +154,7 @@ func (d k8sDiscoveryTest) Run(t *testing.T) {
 
 // readResultWithTimeout reads all targetgroups from channel with timeout.
 // It merges targetgroups by source and sends the result to result channel.
-func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, max int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
+func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, maxGroups int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
 	res := make(map[string]*targetgroup.Group)
 	timeout := time.After(stopAfter)
 Loop:
@@ -167,7 +167,7 @@ Loop:
 				}
 				res[tg.Source] = tg
 			}
-			if len(res) == max {
+			if len(res) == maxGroups {
 				// Reached max target groups we may get, break fast.
 				break Loop
 			}
@@ -175,10 +175,10 @@ Loop:
 			// Because we use queue, an object that is created then
 			// deleted or updated may be processed only once.
 			// So possibly we may skip events, timed out here.
-			t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), max)
+			t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), maxGroups)
 			break Loop
 		case <-ctx.Done():
-			t.Logf("stopped, got %d (max: %d) items", len(res), max)
+			t.Logf("stopped, got %d (max: %d) items", len(res), maxGroups)
 			break Loop
 		}
 	}
diff --git a/notifier/notifier.go b/notifier/notifier.go
index 68b0d4961e..2ebfbbee59 100644
--- a/notifier/notifier.go
+++ b/notifier/notifier.go
@@ -674,7 +674,7 @@ func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []b
 	}()
 
 	// Any HTTP status 2xx is OK.
-	//nolint:usestdlibvars
+
 	if resp.StatusCode/100 != 2 {
 		return fmt.Errorf("bad response status %s", resp.Status)
 	}
diff --git a/promql/functions.go b/promql/functions.go
index 018023bf02..2af06c174e 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -467,15 +467,15 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval
 // === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) ===
 func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	vec := vals[0].(Vector)
-	min := vals[1].(Vector)[0].F
-	max := vals[2].(Vector)[0].F
-	if max < min {
+	minVal := vals[1].(Vector)[0].F
+	maxVal := vals[2].(Vector)[0].F
+	if maxVal < minVal {
 		return enh.Out, nil
 	}
 	for _, el := range vec {
 		enh.Out = append(enh.Out, Sample{
 			Metric: el.Metric.DropMetricName(),
-			F:      math.Max(min, math.Min(max, el.F)),
+			F:      math.Max(minVal, math.Min(maxVal, el.F)),
 		})
 	}
 	return enh.Out, nil
@@ -484,11 +484,11 @@ func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper
 // === clamp_max(Vector parser.ValueTypeVector, max Scalar) (Vector, Annotations) ===
 func funcClampMax(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	vec := vals[0].(Vector)
-	max := vals[1].(Vector)[0].F
+	maxVal := vals[1].(Vector)[0].F
 	for _, el := range vec {
 		enh.Out = append(enh.Out, Sample{
 			Metric: el.Metric.DropMetricName(),
-			F:      math.Min(max, el.F),
+			F:      math.Min(maxVal, el.F),
 		})
 	}
 	return enh.Out, nil
@@ -497,11 +497,11 @@ func funcClampMax(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel
 // === clamp_min(Vector parser.ValueTypeVector, min Scalar) (Vector, Annotations) ===
 func funcClampMin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	vec := vals[0].(Vector)
-	min := vals[1].(Vector)[0].F
+	minVal := vals[1].(Vector)[0].F
 	for _, el := range vec {
 		enh.Out = append(enh.Out, Sample{
 			Metric: el.Metric.DropMetricName(),
-			F:      math.Max(min, el.F),
+			F:      math.Max(minVal, el.F),
 		})
 	}
 	return enh.Out, nil
@@ -700,13 +700,13 @@ func funcMaxOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		return enh.Out, nil
 	}
 	return aggrOverTime(vals, enh, func(s Series) float64 {
-		max := s.Floats[0].F
+		maxVal := s.Floats[0].F
 		for _, f := range s.Floats {
-			if f.F > max || math.IsNaN(max) {
-				max = f.F
+			if f.F > maxVal || math.IsNaN(maxVal) {
+				maxVal = f.F
 			}
 		}
-		return max
+		return maxVal
 	}), nil
 }
 
@@ -720,13 +720,13 @@ func funcMinOverTime(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		return enh.Out, nil
 	}
 	return aggrOverTime(vals, enh, func(s Series) float64 {
-		min := s.Floats[0].F
+		minVal := s.Floats[0].F
 		for _, f := range s.Floats {
-			if f.F < min || math.IsNaN(min) {
-				min = f.F
+			if f.F < minVal || math.IsNaN(minVal) {
+				minVal = f.F
 			}
 		}
-		return min
+		return minVal
 	}), nil
 }
 
diff --git a/promql/parser/lex.go b/promql/parser/lex.go
index 9b88ab5565..0cefa30c8f 100644
--- a/promql/parser/lex.go
+++ b/promql/parser/lex.go
@@ -727,23 +727,23 @@ func lexValueSequence(l *Lexer) stateFn {
 // was only modified to integrate with our lexer.
 func lexEscape(l *Lexer) stateFn {
 	var n int
-	var base, max uint32
+	var base, maxVal uint32
 
 	ch := l.next()
 	switch ch {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
 		return lexString
 	case '0', '1', '2', '3', '4', '5', '6', '7':
-		n, base, max = 3, 8, 255
+		n, base, maxVal = 3, 8, 255
 	case 'x':
 		ch = l.next()
-		n, base, max = 2, 16, 255
+		n, base, maxVal = 2, 16, 255
 	case 'u':
 		ch = l.next()
-		n, base, max = 4, 16, unicode.MaxRune
+		n, base, maxVal = 4, 16, unicode.MaxRune
 	case 'U':
 		ch = l.next()
-		n, base, max = 8, 16, unicode.MaxRune
+		n, base, maxVal = 8, 16, unicode.MaxRune
 	case eof:
 		l.errorf("escape sequence not terminated")
 		return lexString
@@ -772,7 +772,7 @@ func lexEscape(l *Lexer) stateFn {
 		}
 	}
 
-	if x > max || 0xD800 <= x && x < 0xE000 {
+	if x > maxVal || 0xD800 <= x && x < 0xE000 {
 		l.errorf("escape sequence is an invalid Unicode code point")
 	}
 	return lexString
diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml
index 746831a86a..fc0f9c6543 100644
--- a/scripts/golangci-lint.yml
+++ b/scripts/golangci-lint.yml
@@ -36,4 +36,4 @@ jobs:
         uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0
         with:
           args: --verbose
-          version: v1.59.1
+          version: v1.60.1
diff --git a/storage/buffer_test.go b/storage/buffer_test.go
index 1b24e5da22..b5c6443ac5 100644
--- a/storage/buffer_test.go
+++ b/storage/buffer_test.go
@@ -96,10 +96,10 @@ func TestSampleRingMixed(t *testing.T) {
 
 	// With ValNone as the preferred type, nothing should be initialized.
 	r := newSampleRing(10, 2, chunkenc.ValNone)
-	require.Zero(t, len(r.fBuf))
-	require.Zero(t, len(r.hBuf))
-	require.Zero(t, len(r.fhBuf))
-	require.Zero(t, len(r.iBuf))
+	require.Empty(t, r.fBuf)
+	require.Empty(t, r.hBuf)
+	require.Empty(t, r.fhBuf)
+	require.Empty(t, r.iBuf)
 
 	// But then mixed adds should work as expected.
 	r.addF(fSample{t: 1, f: 3.14})
@@ -146,10 +146,10 @@ func TestSampleRingAtFloatHistogram(t *testing.T) {
 
 	// With ValNone as the preferred type, nothing should be initialized.
 	r := newSampleRing(10, 2, chunkenc.ValNone)
-	require.Zero(t, len(r.fBuf))
-	require.Zero(t, len(r.hBuf))
-	require.Zero(t, len(r.fhBuf))
-	require.Zero(t, len(r.iBuf))
+	require.Empty(t, r.fBuf)
+	require.Empty(t, r.hBuf)
+	require.Empty(t, r.fhBuf)
+	require.Empty(t, r.iBuf)
 
 	var (
 		h  *histogram.Histogram
diff --git a/storage/remote/client.go b/storage/remote/client.go
index 11e423b6ab..2a66739ed9 100644
--- a/storage/remote/client.go
+++ b/storage/remote/client.go
@@ -287,7 +287,6 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo
 	// we can continue handling.
 	rs, _ := ParseWriteResponseStats(httpResp)
 
-	//nolint:usestdlibvars
 	if httpResp.StatusCode/100 == 2 {
 		return rs, nil
 	}
@@ -297,7 +296,6 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo
 	body, _ := io.ReadAll(io.LimitReader(httpResp.Body, maxErrMsgLen))
 	err = fmt.Errorf("server returned HTTP status %s: %s", httpResp.Status, body)
 
-	//nolint:usestdlibvars
 	if httpResp.StatusCode/100 == 5 ||
 		(c.retryOnRateLimit && httpResp.StatusCode == http.StatusTooManyRequests) {
 		return rs, RecoverableError{err, retryAfterDuration(httpResp.Header.Get("Retry-After"))}
@@ -382,7 +380,6 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
 		return nil, fmt.Errorf("error reading response. HTTP status code: %s: %w", httpResp.Status, err)
 	}
 
-	//nolint:usestdlibvars
 	if httpResp.StatusCode/100 != 2 {
 		return nil, fmt.Errorf("remote server %s returned HTTP status %s: %s", c.urlString, httpResp.Status, strings.TrimSpace(string(compressed)))
 	}
diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go
index 17ff1850fd..b1c8997268 100644
--- a/storage/remote/queue_manager.go
+++ b/storage/remote/queue_manager.go
@@ -1522,7 +1522,7 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) {
 	// Send batches of at most MaxSamplesPerSend samples to the remote storage.
 	// If we have fewer samples than that, flush them out after a deadline anyways.
 	var (
-		max = s.qm.cfg.MaxSamplesPerSend
+		maxCount = s.qm.cfg.MaxSamplesPerSend
 
 		pBuf    = proto.NewBuffer(nil)
 		pBufRaw []byte
@@ -1530,19 +1530,19 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) {
 	)
 	// TODO(@tpaschalis) Should we also raise the max if we have WAL metadata?
 	if s.qm.sendExemplars {
-		max += int(float64(max) * 0.1)
+		maxCount += int(float64(maxCount) * 0.1)
 	}
 
 	// TODO: Dry all of this, we should make an interface/generic for the timeseries type.
 	batchQueue := queue.Chan()
-	pendingData := make([]prompb.TimeSeries, max)
+	pendingData := make([]prompb.TimeSeries, maxCount)
 	for i := range pendingData {
 		pendingData[i].Samples = []prompb.Sample{{}}
 		if s.qm.sendExemplars {
 			pendingData[i].Exemplars = []prompb.Exemplar{{}}
 		}
 	}
-	pendingDataV2 := make([]writev2.TimeSeries, max)
+	pendingDataV2 := make([]writev2.TimeSeries, maxCount)
 	for i := range pendingDataV2 {
 		pendingDataV2[i].Samples = []writev2.Sample{{}}
 	}
diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go
index af2229b9a2..6e1336a7aa 100644
--- a/storage/remote/write_handler_test.go
+++ b/storage/remote/write_handler_test.go
@@ -453,10 +453,10 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) {
 				expectHeaderValue(t, 0, resp.Header.Get(rw20WrittenHistogramsHeader))
 				expectHeaderValue(t, 0, resp.Header.Get(rw20WrittenExemplarsHeader))
 
-				require.Empty(t, len(appendable.samples))
-				require.Empty(t, len(appendable.histograms))
-				require.Empty(t, len(appendable.exemplars))
-				require.Empty(t, len(appendable.metadata))
+				require.Empty(t, appendable.samples)
+				require.Empty(t, appendable.histograms)
+				require.Empty(t, appendable.exemplars)
+				require.Empty(t, appendable.metadata)
 				return
 			}
 
diff --git a/template/template.go b/template/template.go
index c507dbe746..9ffed6ff61 100644
--- a/template/template.go
+++ b/template/template.go
@@ -166,7 +166,7 @@ func NewTemplateExpander(
 				return html_template.HTML(text)
 			},
 			"match":     regexp.MatchString,
-			"title":     strings.Title, //nolint:staticcheck
+			"title":     strings.Title,
 			"toUpper":   strings.ToUpper,
 			"toLower":   strings.ToLower,
 			"graphLink": strutil.GraphLinkForExpression,
diff --git a/tsdb/chunks/queue_test.go b/tsdb/chunks/queue_test.go
index 9f761a5f3b..3d9275eeef 100644
--- a/tsdb/chunks/queue_test.go
+++ b/tsdb/chunks/queue_test.go
@@ -69,16 +69,16 @@ func TestQueuePushPopSingleGoroutine(t *testing.T) {
 	const maxSize = 500
 	const maxIters = 50
 
-	for max := 1; max < maxSize; max++ {
-		queue := newWriteJobQueue(max, 1+(r.Int()%max))
+	for maxCount := 1; maxCount < maxSize; maxCount++ {
+		queue := newWriteJobQueue(maxCount, 1+(r.Int()%maxCount))
 
 		elements := 0 // total elements in the queue
 		lastWriteID := 0
 		lastReadID := 0
 
 		for iter := 0; iter < maxIters; iter++ {
-			if elements < max {
-				toWrite := r.Int() % (max - elements)
+			if elements < maxCount {
+				toWrite := r.Int() % (maxCount - elements)
 				if toWrite == 0 {
 					toWrite = 1
 				}
diff --git a/tsdb/db.go b/tsdb/db.go
index 94c44161d4..706e5bbac1 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -693,7 +693,7 @@ func (db *DBReadOnly) LastBlockID() (string, error) {
 		return "", err
 	}
 
-	max := uint64(0)
+	maxT := uint64(0)
 
 	lastBlockID := ""
 
@@ -705,8 +705,8 @@ func (db *DBReadOnly) LastBlockID() (string, error) {
 			continue // Not a block dir.
 		}
 		timestamp := ulidObj.Time()
-		if timestamp > max {
-			max = timestamp
+		if timestamp > maxT {
+			maxT = timestamp
 			lastBlockID = dirName
 		}
 	}
@@ -2316,13 +2316,13 @@ func blockDirs(dir string) ([]string, error) {
 	return dirs, nil
 }
 
-func exponential(d, min, max time.Duration) time.Duration {
+func exponential(d, minD, maxD time.Duration) time.Duration {
 	d *= 2
-	if d < min {
-		d = min
+	if d < minD {
+		d = minD
 	}
-	if d > max {
-		d = max
+	if d > maxD {
+		d = maxD
 	}
 	return d
 }
diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index 5943489ffd..904fdeffcb 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -4529,8 +4529,8 @@ func testOOOCompaction(t *testing.T, scenario sampleTypeScenario, addExtraSample
 
 	addSample := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			_, _, err = scenario.appendFunc(app, series2, ts, 2*ts)
@@ -4566,8 +4566,8 @@ func testOOOCompaction(t *testing.T, scenario sampleTypeScenario, addExtraSample
 		var series1Samples, series2Samples []chunks.Sample
 		for _, r := range [][2]int64{{90, 119}, {120, 239}, {240, highest}} {
 			fromMins, toMins := r[0], r[1]
-			for min := fromMins; min <= toMins; min++ {
-				ts := min * time.Minute.Milliseconds()
+			for m := fromMins; m <= toMins; m++ {
+				ts := m * time.Minute.Milliseconds()
 				series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 				series2Samples = append(series2Samples, scenario.sampleFunc(ts, 2*ts))
 			}
@@ -4645,8 +4645,8 @@ func testOOOCompaction(t *testing.T, scenario sampleTypeScenario, addExtraSample
 	verifySamples := func(block *Block, fromMins, toMins int64) {
 		series1Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
 		series2Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 			series2Samples = append(series2Samples, scenario.sampleFunc(ts, 2*ts))
 		}
@@ -4730,8 +4730,8 @@ func testOOOCompactionWithNormalCompaction(t *testing.T, scenario sampleTypeScen
 
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			_, _, err = scenario.appendFunc(app, series2, ts, 2*ts)
@@ -4785,8 +4785,8 @@ func testOOOCompactionWithNormalCompaction(t *testing.T, scenario sampleTypeScen
 	verifySamples := func(block *Block, fromMins, toMins int64) {
 		series1Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
 		series2Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 			series2Samples = append(series2Samples, scenario.sampleFunc(ts, 2*ts))
 		}
@@ -4839,8 +4839,8 @@ func testOOOCompactionWithDisabledWriteLog(t *testing.T, scenario sampleTypeScen
 
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			_, _, err = scenario.appendFunc(app, series2, ts, 2*ts)
@@ -4894,8 +4894,8 @@ func testOOOCompactionWithDisabledWriteLog(t *testing.T, scenario sampleTypeScen
 	verifySamples := func(block *Block, fromMins, toMins int64) {
 		series1Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
 		series2Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 			series2Samples = append(series2Samples, scenario.sampleFunc(ts, 2*ts))
 		}
@@ -4948,8 +4948,8 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa
 
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			_, _, err = scenario.appendFunc(app, series2, ts, 2*ts)
@@ -4996,8 +4996,8 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa
 	verifySamples := func(fromMins, toMins int64) {
 		series1Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
 		series2Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 			series2Samples = append(series2Samples, scenario.sampleFunc(ts, ts*2))
 		}
@@ -5045,10 +5045,10 @@ func Test_Querier_OOOQuery(t *testing.T) {
 	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
-		for min := fromMins; min <= toMins; min += time.Minute.Milliseconds() {
-			_, err := app.Append(0, series1, min, float64(min))
-			if min >= queryMinT && min <= queryMaxT {
-				expSamples = append(expSamples, sample{t: min, f: float64(min)})
+		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			_, err := app.Append(0, series1, m, float64(m))
+			if m >= queryMinT && m <= queryMaxT {
+				expSamples = append(expSamples, sample{t: m, f: float64(m)})
 			}
 			require.NoError(t, err)
 			totalAppended++
@@ -5129,10 +5129,10 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
-		for min := fromMins; min <= toMins; min += time.Minute.Milliseconds() {
-			_, err := app.Append(0, series1, min, float64(min))
-			if min >= queryMinT && min <= queryMaxT {
-				expSamples = append(expSamples, sample{t: min, f: float64(min)})
+		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			_, err := app.Append(0, series1, m, float64(m))
+			if m >= queryMinT && m <= queryMaxT {
+				expSamples = append(expSamples, sample{t: m, f: float64(m)})
 			}
 			require.NoError(t, err)
 			totalAppended++
@@ -5239,9 +5239,9 @@ func testOOOAppendAndQuery(t *testing.T, scenario sampleTypeScenario) {
 		app := db.Appender(context.Background())
 		key := lbls.String()
 		from, to := minutes(fromMins), minutes(toMins)
-		for min := from; min <= to; min += time.Minute.Milliseconds() {
+		for m := from; m <= to; m += time.Minute.Milliseconds() {
 			val := rand.Intn(1000)
-			_, s, err := scenario.appendFunc(app, lbls, min, int64(val))
+			_, s, err := scenario.appendFunc(app, lbls, m, int64(val))
 			if faceError {
 				require.Error(t, err)
 			} else {
@@ -5370,14 +5370,14 @@ func testOOODisabled(t *testing.T, scenario sampleTypeScenario) {
 		app := db.Appender(context.Background())
 		key := lbls.String()
 		from, to := minutes(fromMins), minutes(toMins)
-		for min := from; min <= to; min += time.Minute.Milliseconds() {
-			_, _, err := scenario.appendFunc(app, lbls, min, min)
+		for m := from; m <= to; m += time.Minute.Milliseconds() {
+			_, _, err := scenario.appendFunc(app, lbls, m, m)
 			if faceError {
 				require.Error(t, err)
 				failedSamples++
 			} else {
 				require.NoError(t, err)
-				expSamples[key] = append(expSamples[key], scenario.sampleFunc(min, min))
+				expSamples[key] = append(expSamples[key], scenario.sampleFunc(m, m))
 				totalSamples++
 			}
 		}
@@ -5444,9 +5444,9 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) {
 		app := db.Appender(context.Background())
 		key := lbls.String()
 		from, to := minutes(fromMins), minutes(toMins)
-		for min := from; min <= to; min += time.Minute.Milliseconds() {
+		for m := from; m <= to; m += time.Minute.Milliseconds() {
 			val := rand.Intn(1000)
-			_, s, err := scenario.appendFunc(app, lbls, min, int64(val))
+			_, s, err := scenario.appendFunc(app, lbls, m, int64(val))
 			require.NoError(t, err)
 			expSamples[key] = append(expSamples[key], s)
 			totalSamples++
@@ -5635,8 +5635,8 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) {
 
 	addSample := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 		}
@@ -5723,8 +5723,8 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) {
 
 	verifySamples := func(block *Block, fromMins, toMins int64) {
 		series1Samples := make([]chunks.Sample, 0, toMins-fromMins+1)
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			series1Samples = append(series1Samples, scenario.sampleFunc(ts, ts))
 		}
 		expRes := map[string][]chunks.Sample{
@@ -5772,8 +5772,8 @@ func TestWBLCorruption(t *testing.T) {
 	var allSamples, expAfterRestart []chunks.Sample
 	addSamples := func(fromMins, toMins int64, afterRestart bool) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, err := app.Append(0, series1, ts, float64(ts))
 			require.NoError(t, err)
 			allSamples = append(allSamples, sample{t: ts, f: float64(ts)})
@@ -5926,8 +5926,8 @@ func testOOOMmapCorruption(t *testing.T, scenario sampleTypeScenario) {
 	var allSamples, expInMmapChunks []chunks.Sample
 	addSamples := func(fromMins, toMins int64, inMmapAfterCorruption bool) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, s, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			allSamples = append(allSamples, s)
@@ -6071,8 +6071,8 @@ func testOutOfOrderRuntimeConfig(t *testing.T, scenario sampleTypeScenario) {
 	series1 := labels.FromStrings("foo", "bar1")
 	addSamples := func(t *testing.T, db *DB, fromMins, toMins int64, success bool, allSamples []chunks.Sample) []chunks.Sample {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, s, err := scenario.appendFunc(app, series1, ts, ts)
 			if success {
 				require.NoError(t, err)
@@ -6105,7 +6105,7 @@ func testOutOfOrderRuntimeConfig(t *testing.T, scenario sampleTypeScenario) {
 		// WBL is not empty.
 		size, err := db.head.wbl.Size()
 		require.NoError(t, err)
-		require.Greater(t, size, int64(0))
+		require.Positive(t, size)
 
 		require.Empty(t, db.Blocks())
 		require.NoError(t, db.compactOOOHead(ctx))
@@ -6282,8 +6282,8 @@ func testNoGapAfterRestartWithOOO(t *testing.T, scenario sampleTypeScenario) {
 	series1 := labels.FromStrings("foo", "bar1")
 	addSamples := func(t *testing.T, db *DB, fromMins, toMins int64, success bool) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, _, err := scenario.appendFunc(app, series1, ts, ts)
 			if success {
 				require.NoError(t, err)
@@ -6296,8 +6296,8 @@ func testNoGapAfterRestartWithOOO(t *testing.T, scenario sampleTypeScenario) {
 
 	verifySamples := func(t *testing.T, db *DB, fromMins, toMins int64) {
 		var expSamples []chunks.Sample
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			expSamples = append(expSamples, scenario.sampleFunc(ts, ts))
 		}
 
@@ -6410,8 +6410,8 @@ func testWblReplayAfterOOODisableAndRestart(t *testing.T, scenario sampleTypeSce
 	var allSamples []chunks.Sample
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, s, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			allSamples = append(allSamples, s)
@@ -6477,8 +6477,8 @@ func testPanicOnApplyConfig(t *testing.T, scenario sampleTypeScenario) {
 	var allSamples []chunks.Sample
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, s, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			allSamples = append(allSamples, s)
@@ -6534,8 +6534,8 @@ func testDiskFillingUpAfterDisablingOOO(t *testing.T, scenario sampleTypeScenari
 	var allSamples []chunks.Sample
 	addSamples := func(fromMins, toMins int64) {
 		app := db.Appender(context.Background())
-		for min := fromMins; min <= toMins; min++ {
-			ts := min * time.Minute.Milliseconds()
+		for m := fromMins; m <= toMins; m++ {
+			ts := m * time.Minute.Milliseconds()
 			_, s, err := scenario.appendFunc(app, series1, ts, ts)
 			require.NoError(t, err)
 			allSamples = append(allSamples, s)
diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index b66ac72788..988ce9397e 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -1403,12 +1403,12 @@ func (s *memSeries) histogramsAppendPreprocessor(t int64, e chunkenc.Encoding, o
 // It assumes that the time range is 1/ratioToFull full.
 // Assuming that the samples will keep arriving at the same rate, it will make the
 // remaining n chunks within this chunk range (before max) equally sized.
-func computeChunkEndTime(start, cur, max int64, ratioToFull float64) int64 {
-	n := float64(max-start) / (float64(cur-start+1) * ratioToFull)
+func computeChunkEndTime(start, cur, maxT int64, ratioToFull float64) int64 {
+	n := float64(maxT-start) / (float64(cur-start+1) * ratioToFull)
 	if n <= 1 {
-		return max
+		return maxT
 	}
-	return int64(float64(start) + float64(max-start)/math.Floor(n))
+	return int64(float64(start) + float64(maxT-start)/math.Floor(n))
 }
 
 func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange int64) *memChunk {
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index f73ce38ba7..0ce60b8494 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -2757,7 +2757,7 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) {
 
 	require.Equal(t, int64(math.MinInt64), db.head.minValidTime.Load())
 	require.NoError(t, db.Compact(ctx))
-	require.Greater(t, db.head.minValidTime.Load(), int64(0))
+	require.Positive(t, db.head.minValidTime.Load())
 
 	app = db.Appender(ctx)
 	_, err = appendSample(app, db.head.minValidTime.Load()-2)
@@ -3677,7 +3677,7 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) {
 	require.Len(t, ms.mmappedChunks, 25)
 	expMmapChunks := make([]*mmappedChunk, 0, 20)
 	for _, mmap := range ms.mmappedChunks {
-		require.Greater(t, mmap.numSamples, uint16(0))
+		require.Positive(t, mmap.numSamples)
 		cpy := *mmap
 		expMmapChunks = append(expMmapChunks, &cpy)
 	}
diff --git a/tsdb/index/postingsstats_test.go b/tsdb/index/postingsstats_test.go
index 674e1c0524..82f506bc80 100644
--- a/tsdb/index/postingsstats_test.go
+++ b/tsdb/index/postingsstats_test.go
@@ -20,10 +20,10 @@ import (
 
 func TestPostingsStats(t *testing.T) {
 	stats := &maxHeap{}
-	max := 3000000
-	heapLength := 10
+	const maxCount = 3000000
+	const heapLength = 10
 	stats.init(heapLength)
-	for i := 0; i < max; i++ {
+	for i := 0; i < maxCount; i++ {
 		item := Stat{
 			Name:  "Label-da",
 			Count: uint64(i),
@@ -35,13 +35,13 @@ func TestPostingsStats(t *testing.T) {
 	data := stats.get()
 	require.Len(t, data, 10)
 	for i := 0; i < heapLength; i++ {
-		require.Equal(t, uint64(max-i), data[i].Count)
+		require.Equal(t, uint64(maxCount-i), data[i].Count)
 	}
 }
 
 func TestPostingsStats2(t *testing.T) {
 	stats := &maxHeap{}
-	heapLength := 10
+	const heapLength = 10
 
 	stats.init(heapLength)
 	stats.push(Stat{Name: "Stuff", Count: 10})
@@ -57,12 +57,12 @@ func TestPostingsStats2(t *testing.T) {
 
 func BenchmarkPostingStatsMaxHep(b *testing.B) {
 	stats := &maxHeap{}
-	max := 9000000
-	heapLength := 10
+	const maxCount = 9000000
+	const heapLength = 10
 	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
 		stats.init(heapLength)
-		for i := 0; i < max; i++ {
+		for i := 0; i < maxCount; i++ {
 			item := Stat{
 				Name:  "Label-da",
 				Count: uint64(i),
diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go
index 668fbb5fbc..993e930cef 100644
--- a/tsdb/wlog/wlog.go
+++ b/tsdb/wlog/wlog.go
@@ -612,16 +612,16 @@ func (w *WL) setSegment(segment *Segment) error {
 
 // flushPage writes the new contents of the page to disk. If no more records will fit into
 // the page, the remaining bytes will be set to zero and a new page will be started.
-// If clear is true, this is enforced regardless of how many bytes are left in the page.
-func (w *WL) flushPage(clear bool) error {
+// If forceClear is true, this is enforced regardless of how many bytes are left in the page.
+func (w *WL) flushPage(forceClear bool) error {
 	w.metrics.pageFlushes.Inc()
 
 	p := w.page
-	clear = clear || p.full()
+	shouldClear := forceClear || p.full()
 
 	// No more data will fit into the page or an implicit clear.
 	// Enqueue and clear it.
-	if clear {
+	if shouldClear {
 		p.alloc = pageSize // Write till end of page.
 	}
 
@@ -633,7 +633,7 @@ func (w *WL) flushPage(clear bool) error {
 	p.flushed += n
 
 	// We flushed an entire page, prepare a new one.
-	if clear {
+	if shouldClear {
 		p.reset()
 		w.donePages++
 		w.metrics.pageCompletions.Inc()
diff --git a/util/testutil/directory.go b/util/testutil/directory.go
index 8aa17702d2..38dabd1830 100644
--- a/util/testutil/directory.go
+++ b/util/testutil/directory.go
@@ -155,7 +155,7 @@ func DirHash(t *testing.T, path string) []byte {
 		modTime, err := info.ModTime().GobEncode()
 		require.NoError(t, err)
 
-		_, err = io.WriteString(hash, string(modTime))
+		_, err = hash.Write(modTime)
 		require.NoError(t, err)
 		return nil
 	})
diff --git a/web/web.go b/web/web.go
index 9426ed935a..9fba6fc951 100644
--- a/web/web.go
+++ b/web/web.go
@@ -481,14 +481,14 @@ func New(logger log.Logger, o *Options) *Handler {
 
 	router.Get("/-/healthy", func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprintf(w, o.AppName+" is Healthy.\n")
+		fmt.Fprint(w, o.AppName+" is Healthy.\n")
 	})
 	router.Head("/-/healthy", func(w http.ResponseWriter, _ *http.Request) {
 		w.WriteHeader(http.StatusOK)
 	})
 	router.Get("/-/ready", readyf(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprintf(w, o.AppName+" is Ready.\n")
+		fmt.Fprint(w, o.AppName+" is Ready.\n")
 	}))
 	router.Head("/-/ready", readyf(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)

From 4023c2405a32686a5fd3779fd310d0307922d445 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Mon, 19 Aug 2024 10:43:05 +0200
Subject: [PATCH 35/51] Update web/web.go

Co-authored-by: Julien <291750+roidelapluie@users.noreply.github.com>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 web/web.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/web.go b/web/web.go
index 9fba6fc951..5cc23760a5 100644
--- a/web/web.go
+++ b/web/web.go
@@ -481,7 +481,7 @@ func New(logger log.Logger, o *Options) *Handler {
 
 	router.Get("/-/healthy", func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprint(w, o.AppName+" is Healthy.\n")
+		fmt.Fprintf(w, "%s is Healthy.\n", o.AppName)
 	})
 	router.Head("/-/healthy", func(w http.ResponseWriter, _ *http.Request) {
 		w.WriteHeader(http.StatusOK)

From ee4715064649514c06cda0b1a267484e2af7a1da Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Mon, 19 Aug 2024 10:43:17 +0200
Subject: [PATCH 36/51] Update web/web.go

Co-authored-by: Julien <291750+roidelapluie@users.noreply.github.com>
Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 web/web.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/web.go b/web/web.go
index 5cc23760a5..8e84acd039 100644
--- a/web/web.go
+++ b/web/web.go
@@ -488,7 +488,7 @@ func New(logger log.Logger, o *Options) *Handler {
 	})
 	router.Get("/-/ready", readyf(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprint(w, o.AppName+" is Ready.\n")
+		fmt.Fprintf(w, "%s is Ready.\n", o.AppName)
 	}))
 	router.Head("/-/ready", readyf(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusOK)

From 250aa5031dc5e66ad6b7d1e077461cad64e15b10 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Mon, 19 Aug 2024 10:50:27 +0200
Subject: [PATCH 37/51] Remove empty line

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 notifier/notifier.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/notifier/notifier.go b/notifier/notifier.go
index 2ebfbbee59..218e4cb8c7 100644
--- a/notifier/notifier.go
+++ b/notifier/notifier.go
@@ -674,7 +674,6 @@ func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []b
 	}()
 
 	// Any HTTP status 2xx is OK.
-
 	if resp.StatusCode/100 != 2 {
 		return fmt.Errorf("bad response status %s", resp.Status)
 	}

From 88cac6fb49958eb83faf8eb166eeec62fbee49a0 Mon Sep 17 00:00:00 2001
From: machine424 <ayoubmrini424@gmail.com>
Date: Wed, 14 Aug 2024 19:06:49 +0200
Subject: [PATCH 38/51] docs(configuration.md): clarify the explanations about
 some temp labels and format for better visibility.

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
---
 docs/configuration/configuration.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md
index 313a7f2f37..1c43f750b1 100644
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@@ -3265,12 +3265,16 @@ Initially, aside from the configured per-target labels, a target's `job`
 label is set to the `job_name` value of the respective scrape configuration.
 The `__address__` label is set to the `<host>:<port>` address of the target.
 After relabeling, the `instance` label is set to the value of `__address__` by default if
-it was not set during relabeling. The `__scheme__` and `__metrics_path__` labels
-are set to the scheme and metrics path of the target respectively. The `__param_<name>`
-label is set to the value of the first passed URL parameter called `<name>`.
+it was not set during relabeling.
+
+The `__scheme__` and `__metrics_path__` labels
+are set to the scheme and metrics path of the target respectively, as specified in `scrape_config`.
+
+The `__param_<name>`
+label is set to the value of the first passed URL parameter called `<name>`, as defined in `scrape_config`.
 
 The `__scrape_interval__` and `__scrape_timeout__` labels are set to the target's
-interval and timeout.
+interval and timeout, as specified in `scrape_config`.
 
 Additional labels prefixed with `__meta_` may be available during the
 relabeling phase. They are set by the service discovery mechanism that provided

From f9f39a4411645778dcd4a2fffe7d97249b5b212e Mon Sep 17 00:00:00 2001
From: Julien <291750+roidelapluie@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:09:07 +0200
Subject: [PATCH 39/51] Extend testing CA certificates (#14696)

Signed-off-by: Julien <roidelapluie@o11y.eu>
---
 scrape/testdata/ca.cer  | 18 +++++++++---------
 tracing/testdata/ca.cer | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/scrape/testdata/ca.cer b/scrape/testdata/ca.cer
index 86f627a903..df93443923 100644
--- a/scrape/testdata/ca.cer
+++ b/scrape/testdata/ca.cer
@@ -1,8 +1,8 @@
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
-Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0xNTA4
-MDQxNDA5MjFaFw0yNTA4MDExNDA5MjFaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH
+Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0yNDA4
+MjAxMTUxMjNaFw00NDEyMDUxMTUxMjNaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH
 DAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQgQ29tcGFueSBMdGQxGzAZ
 BgNVBAMMElByb21ldGhldXMgVGVzdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEP
 ADCCAQoCggEBAOlSBU3yWpUELbhzizznR0hnAL7dbEHzfEtEc6N3PoSvMNcqrUVq
@@ -12,11 +12,11 @@ yB9M1ypWomzBz1UFXZp1oiNO5o7/dgXW4MgLUfC2obJ9j5xqpc6GkhWMW4ZFwEr/
 VLjuzxG9B8tLfQuhnXKGn1W8+WzZVWCWMD/sLfZfmjKaWlwcXzL51g8E+IEIBJqV
 w51aMI6lDkcvAM7gLq1auLZMVXyKWSKw7XMCAwEAAaNQME4wHQYDVR0OBBYEFMz1
 BZnlqxJp2HiJSjHK8IsLrWYbMB8GA1UdIwQYMBaAFMz1BZnlqxJp2HiJSjHK8IsL
-rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI2iA3w3TK5J15Pu
-e4fPFB4jxQqsbUwuyXbCCv/jKLeFNCD4BjM181WZEYjPMumeTBVzU3aF45LWQIG1
-0DJcrCL4mjMz9qgAoGqA7aDDXiJGbukMgYYsn7vrnVmrZH8T3E8ySlltr7+W578k
-pJ5FxnbCroQwn0zLyVB3sFbS8E3vpBr3L8oy8PwPHhIScexcNVc3V6/m4vTZsXTH
-U+vUm1XhDgpDcFMTg2QQiJbfpOYUkwIgnRDAT7t282t2KQWtnlqc3zwPQ1F/6Cpx
-j19JeNsaF1DArkD7YlyKj/GhZLtHwFHG5cxznH0mLDJTW7bQvqqh2iQTeXmBk1lU
-mM5lH/s=
+rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAEqhsLzIh098lmLl
+CSmuOi5o0NLFaO3qgzaxhvO56RkrtnMQb99/u/F2gQPBoVrubES4fBDRWtfBkmRZ
+NabgqghBN27nyLa9DEtHeOzEtBWjYnZKOY5uGf/wwIp+HM2H5QBs8c4nJv+46ev3
+L73CS5zWV950dLNPA5iatQgtFsp/tsh2YoYbfPI+bHjMLJWau3cl6ID/m+j4moU7
+hbcXTnehz0250CXoXYzmfPHZUjA97Cs3kbzi6Dkxbz3pmHCAfEHdGRMFIZR7Fs/Y
+7k44NF5q/82FrI+Umt1OdwUTprSAUrKXZHaI9N1CClAcgP1LbqliEKrvLsEvvg7C
+LrUoX4M=
 -----END CERTIFICATE-----
diff --git a/tracing/testdata/ca.cer b/tracing/testdata/ca.cer
index 86f627a903..df93443923 100644
--- a/tracing/testdata/ca.cer
+++ b/tracing/testdata/ca.cer
@@ -1,8 +1,8 @@
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
-Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0xNTA4
-MDQxNDA5MjFaFw0yNTA4MDExNDA5MjFaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH
+Q29tcGFueSBMdGQxGzAZBgNVBAMMElByb21ldGhldXMgVGVzdCBDQTAeFw0yNDA4
+MjAxMTUxMjNaFw00NDEyMDUxMTUxMjNaMF8xCzAJBgNVBAYTAlhYMRUwEwYDVQQH
 DAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQgQ29tcGFueSBMdGQxGzAZ
 BgNVBAMMElByb21ldGhldXMgVGVzdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEP
 ADCCAQoCggEBAOlSBU3yWpUELbhzizznR0hnAL7dbEHzfEtEc6N3PoSvMNcqrUVq
@@ -12,11 +12,11 @@ yB9M1ypWomzBz1UFXZp1oiNO5o7/dgXW4MgLUfC2obJ9j5xqpc6GkhWMW4ZFwEr/
 VLjuzxG9B8tLfQuhnXKGn1W8+WzZVWCWMD/sLfZfmjKaWlwcXzL51g8E+IEIBJqV
 w51aMI6lDkcvAM7gLq1auLZMVXyKWSKw7XMCAwEAAaNQME4wHQYDVR0OBBYEFMz1
 BZnlqxJp2HiJSjHK8IsLrWYbMB8GA1UdIwQYMBaAFMz1BZnlqxJp2HiJSjHK8IsL
-rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAI2iA3w3TK5J15Pu
-e4fPFB4jxQqsbUwuyXbCCv/jKLeFNCD4BjM181WZEYjPMumeTBVzU3aF45LWQIG1
-0DJcrCL4mjMz9qgAoGqA7aDDXiJGbukMgYYsn7vrnVmrZH8T3E8ySlltr7+W578k
-pJ5FxnbCroQwn0zLyVB3sFbS8E3vpBr3L8oy8PwPHhIScexcNVc3V6/m4vTZsXTH
-U+vUm1XhDgpDcFMTg2QQiJbfpOYUkwIgnRDAT7t282t2KQWtnlqc3zwPQ1F/6Cpx
-j19JeNsaF1DArkD7YlyKj/GhZLtHwFHG5cxznH0mLDJTW7bQvqqh2iQTeXmBk1lU
-mM5lH/s=
+rWYbMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAEqhsLzIh098lmLl
+CSmuOi5o0NLFaO3qgzaxhvO56RkrtnMQb99/u/F2gQPBoVrubES4fBDRWtfBkmRZ
+NabgqghBN27nyLa9DEtHeOzEtBWjYnZKOY5uGf/wwIp+HM2H5QBs8c4nJv+46ev3
+L73CS5zWV950dLNPA5iatQgtFsp/tsh2YoYbfPI+bHjMLJWau3cl6ID/m+j4moU7
+hbcXTnehz0250CXoXYzmfPHZUjA97Cs3kbzi6Dkxbz3pmHCAfEHdGRMFIZR7Fs/Y
+7k44NF5q/82FrI+Umt1OdwUTprSAUrKXZHaI9N1CClAcgP1LbqliEKrvLsEvvg7C
+LrUoX4M=
 -----END CERTIFICATE-----

From 1435c8ae4aa1041592778018ba62fc3058a9ad3d Mon Sep 17 00:00:00 2001
From: Ben Kochie <superq@gmail.com>
Date: Tue, 20 Aug 2024 19:41:02 +0200
Subject: [PATCH 40/51] Include test CA text info (#14699)

Use `openssl x509 -text -in <file>` to include the text version of the
certificate in order to make it easier to see the diff when applying
changes to the cert.

Signed-off-by: SuperQ <superq@gmail.com>
---
 scrape/testdata/ca.cer  | 58 +++++++++++++++++++++++++++++++++++++++++
 tracing/testdata/ca.cer | 58 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)

diff --git a/scrape/testdata/ca.cer b/scrape/testdata/ca.cer
index df93443923..dbbd009d4a 100644
--- a/scrape/testdata/ca.cer
+++ b/scrape/testdata/ca.cer
@@ -1,3 +1,61 @@
+Certificate:
+    Data:
+        Version: 3 (0x2)
+        Serial Number:
+            93:6c:9e:29:8d:37:7b:66
+        Signature Algorithm: sha256WithRSAEncryption
+        Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Validity
+            Not Before: Aug 20 11:51:23 2024 GMT
+            Not After : Dec  5 11:51:23 2044 GMT
+        Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Subject Public Key Info:
+            Public Key Algorithm: rsaEncryption
+                Public-Key: (2048 bit)
+                Modulus:
+                    00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7:
+                    47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77:
+                    3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3:
+                    aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d:
+                    a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6:
+                    1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff:
+                    8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28:
+                    0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56:
+                    25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12:
+                    c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75:
+                    a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6:
+                    a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45:
+                    c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1:
+                    9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec:
+                    2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04:
+                    f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f:
+                    00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0:
+                    ed:73
+                Exponent: 65537 (0x10001)
+        X509v3 extensions:
+            X509v3 Subject Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Authority Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Basic Constraints: 
+                CA:TRUE
+    Signature Algorithm: sha256WithRSAEncryption
+    Signature Value:
+        4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0:
+        d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f:
+        df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a:
+        d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c:
+        4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0:
+        8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f:
+        bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16:
+        ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb:
+        77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf:
+        4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91:
+        bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21:
+        94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a:
+        dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a:
+        50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e:
+        b5:28:5f:83
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
diff --git a/tracing/testdata/ca.cer b/tracing/testdata/ca.cer
index df93443923..dbbd009d4a 100644
--- a/tracing/testdata/ca.cer
+++ b/tracing/testdata/ca.cer
@@ -1,3 +1,61 @@
+Certificate:
+    Data:
+        Version: 3 (0x2)
+        Serial Number:
+            93:6c:9e:29:8d:37:7b:66
+        Signature Algorithm: sha256WithRSAEncryption
+        Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Validity
+            Not Before: Aug 20 11:51:23 2024 GMT
+            Not After : Dec  5 11:51:23 2044 GMT
+        Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Subject Public Key Info:
+            Public Key Algorithm: rsaEncryption
+                Public-Key: (2048 bit)
+                Modulus:
+                    00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7:
+                    47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77:
+                    3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3:
+                    aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d:
+                    a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6:
+                    1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff:
+                    8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28:
+                    0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56:
+                    25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12:
+                    c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75:
+                    a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6:
+                    a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45:
+                    c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1:
+                    9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec:
+                    2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04:
+                    f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f:
+                    00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0:
+                    ed:73
+                Exponent: 65537 (0x10001)
+        X509v3 extensions:
+            X509v3 Subject Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Authority Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Basic Constraints: 
+                CA:TRUE
+    Signature Algorithm: sha256WithRSAEncryption
+    Signature Value:
+        4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0:
+        d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f:
+        df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a:
+        d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c:
+        4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0:
+        8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f:
+        bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16:
+        ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb:
+        77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf:
+        4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91:
+        bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21:
+        94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a:
+        dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a:
+        50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e:
+        b5:28:5f:83
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg

From 8e82ac8d2bbee98c1f835fe715d2ea2ad4aa362a Mon Sep 17 00:00:00 2001
From: suntala <arati.rana@grafana.com>
Date: Tue, 20 Aug 2024 20:40:55 +0200
Subject: [PATCH 41/51] Add comments to the sort by label functions

Signed-off-by: suntala <arati.rana@grafana.com>
---
 promql/functions.go | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index e9bfe45f4a..189d67caf5 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -406,17 +406,22 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel
 
 // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+	// First, sort by the full label set. This ensures a consistent ordering in case sorting by the
+	// labels provided as arguments is not conclusive.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
 		return labels.Compare(a.Metric, b.Metric)
 	})
 
 	labels := stringSliceFromArgs(args[1:])
+	// Next, sort by the labels provided as arguments.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
-		// Iterate over each given label
+		// Iterate over each given label.
 		for _, label := range labels {
 			lv1 := a.Metric.Get(label)
 			lv2 := b.Metric.Get(label)
 
+			// If we encounter multiple samples with the same label values, the sorting which was
+			// performed in the first step will act as a "tie breaker".
 			if lv1 == lv2 {
 				continue
 			}
@@ -436,17 +441,22 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode
 
 // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+	// First, sort by the full label set. This ensures a consistent ordering in case sorting by the
+	// labels provided as arguments is not conclusive.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
 		return labels.Compare(b.Metric, a.Metric)
 	})
 
 	labels := stringSliceFromArgs(args[1:])
+	// Next, sort by the labels provided as arguments.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
-		// Iterate over each given label
+		// Iterate over each given label.
 		for _, label := range labels {
 			lv1 := a.Metric.Get(label)
 			lv2 := b.Metric.Get(label)
 
+			// If we encounter multiple samples with the same label values, the sorting which was
+			// performed in the first step will act as a "tie breaker".
 			if lv1 == lv2 {
 				continue
 			}

From 21106611217feefa32bb767f938be73c946700e5 Mon Sep 17 00:00:00 2001
From: cuishuang <imcusg@gmail.com>
Date: Wed, 21 Aug 2024 18:15:25 +0800
Subject: [PATCH 42/51] fix: fix slice init length

Signed-off-by: cuishuang <imcusg@gmail.com>
---
 storage/remote/queue_manager_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go
index 1c06173a59..032a1a92f7 100644
--- a/storage/remote/queue_manager_test.go
+++ b/storage/remote/queue_manager_test.go
@@ -930,7 +930,7 @@ func createHistograms(numSamples, numSeries int, floatHistogram bool) ([]record.
 }
 
 func createSeriesMetadata(series []record.RefSeries) []record.RefMetadata {
-	metas := make([]record.RefMetadata, len(series))
+	metas := make([]record.RefMetadata, 0, len(series))
 
 	for _, s := range series {
 		metas = append(metas, record.RefMetadata{

From 9a74d53935db7fc847bb87e00d61ca112dff409e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 21 Aug 2024 14:24:20 +0100
Subject: [PATCH 43/51] [BUGFIX] TSDB: Fix query overlapping in-order and ooo
 head  (#14693)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* tsdb: Unit test query overlapping in order and ooo head

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* TSDB: Merge overlapping head chunk

The basic idea is that getOOOSeriesChunks can populate Meta.Chunk, but since
it only returns one Meta per overlapping time-slot, that pointer may end up in a
Meta with a head-chunk ID. So we need HeadAndOOOChunkReader.ChunkOrIterable()
to call mergedChunks in that case.

Previously, mergedChunks was checking that meta.Ref was a valid OOO chunk reference,
but it never actually uses that reference; it just finds all chunks overlapping in time.
So we can delete that code.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

Co-authored-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/db_test.go       | 34 ++++++++++++++++++++++++++++------
 tsdb/head_read.go     | 23 ++---------------------
 tsdb/ooo_head_read.go |  6 +++++-
 3 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index 904fdeffcb..6d266e8724 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -5042,10 +5042,16 @@ func Test_Querier_OOOQuery(t *testing.T) {
 	series1 := labels.FromStrings("foo", "bar1")
 
 	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
-	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter func(int64) bool) ([]chunks.Sample, int) {
+		if filter == nil {
+			filter = func(int64) bool { return true }
+		}
 		app := db.Appender(context.Background())
 		totalAppended := 0
 		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			if !filter(m / time.Minute.Milliseconds()) {
+				continue
+			}
 			_, err := app.Append(0, series1, m, float64(m))
 			if m >= queryMinT && m <= queryMaxT {
 				expSamples = append(expSamples, sample{t: m, f: float64(m)})
@@ -5084,6 +5090,15 @@ func Test_Querier_OOOQuery(t *testing.T) {
 			oooMinT:     minutes(0),
 			oooMaxT:     minutes(99),
 		},
+		{
+			name:        "query overlapping inorder and ooo samples returns all ingested samples",
+			queryMinT:   minutes(0),
+			queryMaxT:   minutes(200),
+			inOrderMinT: minutes(100),
+			inOrderMaxT: minutes(200),
+			oooMinT:     minutes(180 - opts.OutOfOrderCapMax/2), // Make sure to fit into the OOO head.
+			oooMaxT:     minutes(180),
+		},
 	}
 	for _, tc := range tests {
 		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
@@ -5093,13 +5108,20 @@ func Test_Querier_OOOQuery(t *testing.T) {
 				require.NoError(t, db.Close())
 			}()
 
-			var expSamples []chunks.Sample
+			var (
+				expSamples []chunks.Sample
+				inoSamples int
+			)
 
-			// Add in-order samples.
-			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+			// Add in-order samples (at even minutes).
+			expSamples, inoSamples = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 0 })
+			// Sanity check that filter is not too zealous.
+			require.Positive(t, inoSamples, 0)
 
-			// Add out-of-order samples.
-			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+			// Add out-of-order samples (at odd minutes).
+			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 1 })
+			// Sanity check that filter is not too zealous.
+			require.Positive(t, oooSamples, 0)
 
 			sort.Slice(expSamples, func(i, j int) bool {
 				return expSamples[i].T() < expSamples[j].T()
diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index 47f12df994..5e3a76273a 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -481,31 +481,12 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 	return elem, true, offset == 0, nil
 }
 
-// mergedChunks return an iterable over one or more OOO chunks for the given
-// chunks.Meta reference from memory or by m-mapping it from the disk. The
-// returned iterable will be a merge of all the overlapping chunks, if any,
-// amongst all the chunks in the OOOHead.
+// mergedChunks return an iterable over all chunks that overlap the
+// time window [mint,maxt], plus meta.Chunk if populated.
 // If hr is non-nil then in-order chunks are included.
 // This function is not thread safe unless the caller holds a lock.
 // The caller must ensure that s.ooo is not nil.
 func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (chunkenc.Iterable, error) {
-	_, cid, _ := unpackHeadChunkRef(meta.Ref)
-
-	// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
-	// incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index.
-	// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
-	// is len(s.mmappedChunks), it represents the next chunk, which is the head chunk.
-	ix := int(cid) - int(s.ooo.firstOOOChunkID)
-	if ix < 0 || ix > len(s.ooo.oooMmappedChunks) {
-		return nil, storage.ErrNotFound
-	}
-
-	if ix == len(s.ooo.oooMmappedChunks) {
-		if s.ooo.oooHeadChunk == nil {
-			return nil, errors.New("invalid ooo head chunk")
-		}
-	}
-
 	// We create a temporary slice of chunk metas to hold the information of all
 	// possible chunks that may overlap with the requested chunk.
 	tmpChks := make([]chunkMetaAndChunkDiskMapperRef, 0, len(s.ooo.oooMmappedChunks)+1)
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index 55e241fd90..cc98df4729 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -242,7 +242,7 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader,
 
 func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
 	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
+	if !isOOO && meta.Chunk == nil { // meta.Chunk can have a copy of OOO head samples, even on non-OOO chunk ID.
 		return cr.cr.ChunkOrIterable(meta)
 	}
 
@@ -253,6 +253,10 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu
 	}
 
 	s.Lock()
+	if s.ooo == nil { // Must have s.ooo non-nil to call mergedChunks().
+		s.Unlock()
+		return cr.cr.ChunkOrIterable(meta)
+	}
 	mc, err := s.mergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
 	s.Unlock()
 

From b50c5d42feb66dfb74d70fcdc0eac9d1a15006e2 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Wed, 21 Aug 2024 15:22:38 +0200
Subject: [PATCH 44/51] OTLP receiver: Warn when encountering invalid
 exponential histograms

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 CHANGELOG.md                                  |  1 +
 .../prometheusremotewrite/histograms.go       | 21 ++++++----
 .../prometheusremotewrite/metrics_to_prw.go   | 11 ++++--
 .../metrics_to_prw_test.go                    | 39 ++++++++++++++++++-
 storage/remote/write_handler.go               |  9 ++++-
 5 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0036a4a762..6fa8410c98 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## unreleased
 
 * [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200
+* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706
 * [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042
 
 ## 2.54.0-rc.1 / 2024-08-05
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
index 73528019d8..ec93387fc6 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
@@ -26,6 +26,7 @@ import (
 
 	"github.com/prometheus/prometheus/model/value"
 	"github.com/prometheus/prometheus/prompb"
+	"github.com/prometheus/prometheus/util/annotations"
 )
 
 const defaultZeroThreshold = 1e-128
@@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128
 // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series
 // as native histogram samples.
 func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice,
-	resource pcommon.Resource, settings Settings, promName string) error {
+	resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) {
+	var annots annotations.Annotations
 	for x := 0; x < dataPoints.Len(); x++ {
 		pt := dataPoints.At(x)
 
-		histogram, err := exponentialToNativeHistogram(pt)
+		histogram, ws, err := exponentialToNativeHistogram(pt)
+		annots.Merge(ws)
 		if err != nil {
-			return err
+			return annots, err
 		}
 
 		lbls := createAttributes(
@@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr
 		ts.Exemplars = append(ts.Exemplars, exemplars...)
 	}
 
-	return nil
+	return annots, nil
 }
 
 // exponentialToNativeHistogram translates OTel Exponential Histogram data point
 // to Prometheus Native Histogram.
-func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) {
+func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) {
+	var annots annotations.Annotations
 	scale := p.Scale()
 	if scale < -4 {
-		return prompb.Histogram{},
+		return prompb.Histogram{}, annots,
 			fmt.Errorf("cannot convert exponential to native histogram."+
 				" Scale must be >= -4, was %d", scale)
 	}
@@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
 			h.Sum = p.Sum()
 		}
 		h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()}
+		if p.Count() == 0 && h.Sum != 0 {
+			annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum))
+		}
 	}
-	return h, nil
+	return h, annots, nil
 }
 
 // convertBucketsLayout translates OTel Exponential Histogram dense buckets
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
index a3a7897232..9d76800809 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
@@ -27,6 +27,7 @@ import (
 
 	"github.com/prometheus/prometheus/prompb"
 	prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus"
+	"github.com/prometheus/prometheus/util/annotations"
 )
 
 type Settings struct {
@@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter {
 }
 
 // FromMetrics converts pmetric.Metrics to Prometheus remote write format.
-func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) {
+func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) {
 	resourceMetricsSlice := md.ResourceMetrics()
 	for i := 0; i < resourceMetricsSlice.Len(); i++ {
 		resourceMetrics := resourceMetricsSlice.At(i)
@@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
 						errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name()))
 						break
 					}
-					errs = multierr.Append(errs, c.addExponentialHistogramDataPoints(
+					ws, err := c.addExponentialHistogramDataPoints(
 						dataPoints,
 						resource,
 						settings,
 						promName,
-					))
+					)
+					annots.Merge(ws)
+					errs = multierr.Append(errs, err)
 				case pmetric.MetricTypeSummary:
 					dataPoints := metric.Summary().DataPoints()
 					if dataPoints.Len() == 0 {
@@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
 		addResourceTargetInfo(resource, settings, mostRecentTimestamp, c)
 	}
 
-	return
+	return annots, errs
 }
 
 func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool {
diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go
index 37ac677747..bdc1c9d0b2 100644
--- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go
@@ -27,6 +27,41 @@ import (
 	"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
 )
 
+func TestFromMetrics(t *testing.T) {
+	t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) {
+		request := pmetricotlp.NewExportRequest()
+		rm := request.Metrics().ResourceMetrics().AppendEmpty()
+		generateAttributes(rm.Resource().Attributes(), "resource", 10)
+
+		metrics := rm.ScopeMetrics().AppendEmpty().Metrics()
+		ts := pcommon.NewTimestampFromTime(time.Now())
+
+		for i := 1; i <= 10; i++ {
+			m := metrics.AppendEmpty()
+			m.SetEmptyExponentialHistogram()
+			m.SetName(fmt.Sprintf("histogram-%d", i))
+			m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+			h := m.ExponentialHistogram().DataPoints().AppendEmpty()
+			h.SetTimestamp(ts)
+
+			h.SetCount(0)
+			h.SetSum(155)
+
+			generateAttributes(h.Attributes(), "series", 10)
+		}
+
+		converter := NewPrometheusConverter()
+		annots, err := converter.FromMetrics(request.Metrics(), Settings{})
+		require.NoError(t, err)
+		require.NotEmpty(t, annots)
+		ws, infos := annots.AsStrings("", 0, 0)
+		require.Empty(t, infos)
+		require.Equal(t, []string{
+			"exponential histogram data point has zero count, but non-zero sum: 155.000000",
+		}, ws)
+	})
+}
+
 func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
 	for _, resourceAttributeCount := range []int{0, 5, 50} {
 		b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) {
@@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
 
 											for i := 0; i < b.N; i++ {
 												converter := NewPrometheusConverter()
-												require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{}))
+												annots, err := converter.FromMetrics(payload.Metrics(), Settings{})
+												require.NoError(b, err)
+												require.Empty(b, annots)
 												require.NotNil(b, converter.TimeSeries())
 											}
 										})
diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go
index aba79a561d..8ac7590465 100644
--- a/storage/remote/write_handler.go
+++ b/storage/remote/write_handler.go
@@ -502,12 +502,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	otlpCfg := h.configFunc().OTLPConfig
 
 	converter := otlptranslator.NewPrometheusConverter()
-	if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
+	annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
 		AddMetricSuffixes:         true,
 		PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes,
-	}); err != nil {
+	})
+	if err != nil {
 		level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err)
 	}
+	ws, _ := annots.AsStrings("", 0, 0)
+	if len(ws) > 0 {
+		level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws)
+	}
 
 	err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{
 		Timeseries: converter.TimeSeries(),

From fbcd50f32c7a5f8f040cb4c9a4335f821a263f18 Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Wed, 21 Aug 2024 18:55:28 +0200
Subject: [PATCH 45/51] Upgrade golangci-lint to v1.60.2

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 .github/workflows/ci.yml  | 2 +-
 Makefile.common           | 2 +-
 scripts/golangci-lint.yml | 2 +-
 template/template.go      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c3a1d68e98..c89c9507bb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,7 +186,7 @@ jobs:
         with:
           args: --verbose
           # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
-          version: v1.60.1
+          version: v1.60.2
   fuzzing:
     uses: ./.github/workflows/fuzzing.yml
     if: github.event_name == 'pull_request'
diff --git a/Makefile.common b/Makefile.common
index 2ecd5465c3..34d65bb56d 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -61,7 +61,7 @@ PROMU_URL     := https://github.com/prometheus/promu/releases/download/v$(PROMU_
 SKIP_GOLANGCI_LINT :=
 GOLANGCI_LINT :=
 GOLANGCI_LINT_OPTS ?=
-GOLANGCI_LINT_VERSION ?= v1.60.1
+GOLANGCI_LINT_VERSION ?= v1.60.2
 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
 # windows isn't included here because of the path separator being different.
 ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml
index fc0f9c6543..f4a7385bb5 100644
--- a/scripts/golangci-lint.yml
+++ b/scripts/golangci-lint.yml
@@ -36,4 +36,4 @@ jobs:
         uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0
         with:
           args: --verbose
-          version: v1.60.1
+          version: v1.60.2
diff --git a/template/template.go b/template/template.go
index 9ffed6ff61..c507dbe746 100644
--- a/template/template.go
+++ b/template/template.go
@@ -166,7 +166,7 @@ func NewTemplateExpander(
 				return html_template.HTML(text)
 			},
 			"match":     regexp.MatchString,
-			"title":     strings.Title,
+			"title":     strings.Title, //nolint:staticcheck
 			"toUpper":   strings.ToUpper,
 			"toLower":   strings.ToLower,
 			"graphLink": strutil.GraphLinkForExpression,

From 0f760f63dd5137ad406e0bc9f08676898e04ac97 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@grafana.com>
Date: Thu, 22 Aug 2024 13:59:36 +0200
Subject: [PATCH 46/51] lint: Revamp our linting rules, mostly around doc
 comments

Several things done here:

- Set `max-issues-per-linter` to 0 so that we actually see all linter
  warnings and not just 50 per linter. (As we also set
  `max-same-issues` to 0, I assume this was the intention from the
  beginning.)

- Stop using the golangci-lint default excludes (by setting
  `exclude-use-default: false`. Those are too generous and don't match
  our style conventions. (I have re-added some of the excludes
  explicitly in this commit. See below.)

- Re-add the `errcheck` exclusion we have used so far via the
  defaults.

- Exclude the signature requirement `govet` has for `Seek` methods
  because we use non-standard `Seek` methods a lot. (But we keep other
  requirements, while the default excludes completely disabled the
  check for common method segnatures.)

- Exclude warnings about missing doc comments on exported symbols. (We
  used to be pretty adamant about doc comments, but stopped that at
  some point in the past. By now, we have about 500 missing doc
  comments. We may consider reintroducing this check, but that's
  outside of the scope of this commit. The default excludes of
  golangci-lint essentially ignore doc comments completely.)

- By stop using the default excludes, we now get warnings back on
  malformed doc comments. That's the most impactful change in this
  commit. It does not enforce doc comments (again), but _if_ there is
  a doc comment, it has to have the recommended form. (Most of the
  changes in this commit are fixing this form.)

- Improve wording/spelling of some comments in .golangci.yml, and
  remove an outdated comment.

- Leave `package-comments` inactive, but add a TODO asking if we
  should change that.

- Add a new sub-linter `comment-spacings` (and fix corresponding
  comments), which avoids missing spaces after the leading `//`.

Signed-off-by: beorn7 <beorn@grafana.com>
---
 .golangci.yml                        | 30 +++++++++++++++++++++++-----
 cmd/promtool/main.go                 |  2 +-
 cmd/promtool/metrics.go              |  2 +-
 discovery/discoverer_metrics_noop.go |  2 +-
 discovery/discovery.go               | 22 ++++++++++----------
 discovery/manager.go                 |  2 +-
 discovery/metrics_refresh.go         |  2 +-
 discovery/util.go                    |  6 +++---
 model/exemplar/exemplar.go           |  6 ++++--
 model/labels/labels.go               | 11 +++++-----
 model/textparse/interface.go         |  4 ++--
 promql/engine.go                     |  6 +++---
 promql/parser/ast.go                 |  5 ++---
 scrape/clientprotobuf.go             |  4 ++--
 storage/interface.go                 |  2 +-
 storage/remote/azuread/azuread.go    |  6 ++++--
 template/template.go                 |  2 +-
 tsdb/chunks/head_chunks.go           |  4 ++--
 tsdb/db.go                           |  2 +-
 tsdb/encoding/encoding.go            |  4 ++--
 tsdb/head_other.go                   |  2 +-
 tsdb/index/index.go                  |  5 +++--
 tsdb/ooo_head_read.go                |  4 ++--
 tsdb/wlog/watcher.go                 | 11 +++++-----
 tsdb/wlog/wlog.go                    |  4 ++--
 util/annotations/annotations.go      |  2 +-
 util/testutil/cmp.go                 |  5 +++--
 27 files changed, 93 insertions(+), 64 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index e924fe3d5b..303cd33d8b 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -25,15 +25,34 @@ linters:
     - loggercheck
 
 issues:
+  max-issues-per-linter: 0
   max-same-issues: 0
+  # The default exclusions are too aggressive. For one, they
+  # essentially disable any linting on doc comments. We disable
+  # default exclusions here and add exclusions fitting our codebase
+  # further down.
+  exclude-use-default: false
   exclude-files:
     # Skip autogenerated files.
     - ^.*\.(pb|y)\.go$
   exclude-dirs:
-    # Copied it from a different source
+    # Copied it from a different source.
     - storage/remote/otlptranslator/prometheusremotewrite
     - storage/remote/otlptranslator/prometheus
   exclude-rules:
+    - linters:
+        - errcheck
+      # Taken from the default exclusions (that are otherwise disabled above).
+      text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
+    - linters:
+        - govet
+      # We use many Seek methods that do not follow the usual pattern.
+      text: "stdmethods: method Seek.* should have signature Seek"
+    - linters:
+        - revive
+      # We have stopped at some point to write doc comments on exported symbols.
+      # TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now.
+      text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported
     - linters:
         - gocritic
       text: "appendAssign"
@@ -94,15 +113,14 @@ linters-settings:
     errorf: false
   revive:
     # By default, revive will enable only the linting rules that are named in the configuration file.
-    # So, it's needed to explicitly set in configuration all required rules.
-    # The following configuration enables all the rules from the defaults.toml
-    # https://github.com/mgechev/revive/blob/master/defaults.toml
+    # So, it's needed to explicitly enable all required rules here.
     rules:
       # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
       - name: blank-imports
+      - name: comment-spacings
       - name: context-as-argument
         arguments:
-          # allow functions with test or bench signatures
+          # Allow functions with test or bench signatures.
           - allowTypesBefore: "*testing.T,testing.TB"
       - name: context-keys-type
       - name: dot-imports
@@ -118,6 +136,8 @@ linters-settings:
       - name: increment-decrement
       - name: indent-error-flow
       - name: package-comments
+        # TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them.
+        disabled: true
       - name: range
       - name: receiver-naming
       - name: redefines-builtin-id
diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go
index 1c8e1dd1c8..4d033c3c09 100644
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@@ -471,7 +471,7 @@ func (ls lintConfig) lintDuplicateRules() bool {
 	return ls.all || ls.duplicateRules
 }
 
-// Check server status - healthy & ready.
+// CheckServerStatus - healthy & ready.
 func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
 	if serverURL.Scheme == "" {
 		serverURL.Scheme = "http"
diff --git a/cmd/promtool/metrics.go b/cmd/promtool/metrics.go
index 6d162f459a..4c91d1d6fe 100644
--- a/cmd/promtool/metrics.go
+++ b/cmd/promtool/metrics.go
@@ -31,7 +31,7 @@ import (
 	"github.com/prometheus/prometheus/util/fmtutil"
 )
 
-// Push metrics to a prometheus remote write (for testing purpose only).
+// PushMetrics to a prometheus remote write (for testing purpose only).
 func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
 	addressURL, err := url.Parse(url.String())
 	if err != nil {
diff --git a/discovery/discoverer_metrics_noop.go b/discovery/discoverer_metrics_noop.go
index 638317ace1..4321204b6c 100644
--- a/discovery/discoverer_metrics_noop.go
+++ b/discovery/discoverer_metrics_noop.go
@@ -13,7 +13,7 @@
 
 package discovery
 
-// Create a dummy metrics struct, because this SD doesn't have any metrics.
+// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics.
 type NoopDiscovererMetrics struct{}
 
 var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil)
diff --git a/discovery/discovery.go b/discovery/discovery.go
index a5826f8176..a91faf6c86 100644
--- a/discovery/discovery.go
+++ b/discovery/discovery.go
@@ -39,7 +39,7 @@ type Discoverer interface {
 	Run(ctx context.Context, up chan<- []*targetgroup.Group)
 }
 
-// Internal metrics of service discovery mechanisms.
+// DiscovererMetrics are internal metrics of service discovery mechanisms.
 type DiscovererMetrics interface {
 	Register() error
 	Unregister()
@@ -56,7 +56,7 @@ type DiscovererOptions struct {
 	HTTPClientOptions []config.HTTPClientOption
 }
 
-// Metrics used by the "refresh" package.
+// RefreshMetrics are used by the "refresh" package.
 // We define them here in the "discovery" package in order to avoid a cyclic dependency between
 // "discovery" and "refresh".
 type RefreshMetrics struct {
@@ -64,17 +64,18 @@ type RefreshMetrics struct {
 	Duration prometheus.Observer
 }
 
-// Instantiate the metrics used by the "refresh" package.
+// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package.
 type RefreshMetricsInstantiator interface {
 	Instantiate(mech string) *RefreshMetrics
 }
 
-// An interface for registering, unregistering, and instantiating metrics for the "refresh" package.
-// Refresh metrics are registered and unregistered outside of the service discovery mechanism.
-// This is so that the same metrics can be reused across different service discovery mechanisms.
-// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are
-// specific to that SD. However, doing so would also expose too many unused metrics on
-// the Prometheus /metrics endpoint.
+// RefreshMetricsManager is an interface for registering, unregistering, and
+// instantiating metrics for the "refresh" package. Refresh metrics are
+// registered and unregistered outside of the service discovery mechanism. This
+// is so that the same metrics can be reused across different service discovery
+// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to
+// use const labels which are specific to that SD. However, doing so would also
+// expose too many unused metrics on the Prometheus /metrics endpoint.
 type RefreshMetricsManager interface {
 	DiscovererMetrics
 	RefreshMetricsInstantiator
@@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
 	return staticDiscoverer(c), nil
 }
 
-// No metrics are needed for this service discovery mechanism.
+// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are
+// needed for this service discovery mechanism.
 func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
 	return &NoopDiscovererMetrics{}
 }
diff --git a/discovery/manager.go b/discovery/manager.go
index 897d7d151c..48bea85bb7 100644
--- a/discovery/manager.go
+++ b/discovery/manager.go
@@ -64,7 +64,7 @@ func (p *Provider) Config() interface{} {
 	return p.config
 }
 
-// Registers the metrics needed for SD mechanisms.
+// CreateAndRegisterSDMetrics registers the metrics needed for SD mechanisms.
 // Does not register the metrics for the Discovery Manager.
 // TODO(ptodev): Add ability to unregister the metrics?
 func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]DiscovererMetrics, error) {
diff --git a/discovery/metrics_refresh.go b/discovery/metrics_refresh.go
index d621165ced..ef49e591a3 100644
--- a/discovery/metrics_refresh.go
+++ b/discovery/metrics_refresh.go
@@ -17,7 +17,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )
 
-// Metric vectors for the "refresh" package.
+// RefreshMetricsVecs are metric vectors for the "refresh" package.
 // We define them here in the "discovery" package in order to avoid a cyclic dependency between
 // "discovery" and "refresh".
 type RefreshMetricsVecs struct {
diff --git a/discovery/util.go b/discovery/util.go
index 83cc640dd9..4e2a088518 100644
--- a/discovery/util.go
+++ b/discovery/util.go
@@ -19,8 +19,8 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )
 
-// A utility to be used by implementations of discovery.Discoverer
-// which need to manage the lifetime of their metrics.
+// MetricRegisterer is used by implementations of discovery.Discoverer that need
+// to manage the lifetime of their metrics.
 type MetricRegisterer interface {
 	RegisterMetrics() error
 	UnregisterMetrics()
@@ -34,7 +34,7 @@ type metricRegistererImpl struct {
 
 var _ MetricRegisterer = &metricRegistererImpl{}
 
-// Creates an instance of a MetricRegisterer.
+// NewMetricRegisterer creates an instance of a MetricRegisterer.
 // Typically called inside the implementation of the NewDiscoverer() method.
 func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer {
 	return &metricRegistererImpl{
diff --git a/model/exemplar/exemplar.go b/model/exemplar/exemplar.go
index 08f55374ef..2c28b17257 100644
--- a/model/exemplar/exemplar.go
+++ b/model/exemplar/exemplar.go
@@ -15,7 +15,9 @@ package exemplar
 
 import "github.com/prometheus/prometheus/model/labels"
 
-// The combined length of the label names and values of an Exemplar's LabelSet MUST NOT exceed 128 UTF-8 characters
+// ExemplarMaxLabelSetLength is defined by OpenMetrics: "The combined length of
+// the label names and values of an Exemplar's LabelSet MUST NOT exceed 128
+// UTF-8 characters."
 // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#exemplars
 const ExemplarMaxLabelSetLength = 128
 
@@ -49,7 +51,7 @@ func (e Exemplar) Equals(e2 Exemplar) bool {
 	return e.Value == e2.Value
 }
 
-// Sort first by timestamp, then value, then labels.
+// Compare first timestamps, then values, then labels.
 func Compare(a, b Exemplar) int {
 	if a.Ts < b.Ts {
 		return -1
diff --git a/model/labels/labels.go b/model/labels/labels.go
index cd30f4f8ff..f4de7496ce 100644
--- a/model/labels/labels.go
+++ b/model/labels/labels.go
@@ -315,7 +315,8 @@ func Compare(a, b Labels) int {
 	return len(a) - len(b)
 }
 
-// Copy labels from b on top of whatever was in ls previously, reusing memory or expanding if needed.
+// CopyFrom copies labels from b on top of whatever was in ls previously,
+// reusing memory or expanding if needed.
 func (ls *Labels) CopyFrom(b Labels) {
 	(*ls) = append((*ls)[:0], b...)
 }
@@ -422,7 +423,7 @@ type ScratchBuilder struct {
 	add Labels
 }
 
-// Symbol-table is no-op, just for api parity with dedupelabels.
+// SymbolTable is no-op, just for api parity with dedupelabels.
 type SymbolTable struct{}
 
 func NewSymbolTable() *SymbolTable { return nil }
@@ -458,7 +459,7 @@ func (b *ScratchBuilder) Add(name, value string) {
 	b.add = append(b.add, Label{Name: name, Value: value})
 }
 
-// Add a name/value pair, using []byte instead of string.
+// UnsafeAddBytes adds a name/value pair, using []byte instead of string.
 // The '-tags stringlabels' version of this function is unsafe, hence the name.
 // This version is safe - it copies the strings immediately - but we keep the same name so everything compiles.
 func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
@@ -475,14 +476,14 @@ func (b *ScratchBuilder) Assign(ls Labels) {
 	b.add = append(b.add[:0], ls...) // Copy on top of our slice, so we don't retain the input slice.
 }
 
-// Return the name/value pairs added so far as a Labels object.
+// Labels returns the name/value pairs added so far as a Labels object.
 // Note: if you want them sorted, call Sort() first.
 func (b *ScratchBuilder) Labels() Labels {
 	// Copy the slice, so the next use of ScratchBuilder doesn't overwrite.
 	return append([]Label{}, b.add...)
 }
 
-// Write the newly-built Labels out to ls.
+// Overwrite the newly-built Labels out to ls.
 // Callers must ensure that there are no other references to ls, or any strings fetched from it.
 func (b *ScratchBuilder) Overwrite(ls *Labels) {
 	*ls = append((*ls)[:0], b.add...)
diff --git a/model/textparse/interface.go b/model/textparse/interface.go
index df01dbc34f..0b5d9281e4 100644
--- a/model/textparse/interface.go
+++ b/model/textparse/interface.go
@@ -106,8 +106,8 @@ const (
 	EntryInvalid   Entry = -1
 	EntryType      Entry = 0
 	EntryHelp      Entry = 1
-	EntrySeries    Entry = 2 // A series with a simple float64 as value.
+	EntrySeries    Entry = 2 // EntrySeries marks a series with a simple float64 as value.
 	EntryComment   Entry = 3
 	EntryUnit      Entry = 4
-	EntryHistogram Entry = 5 // A series with a native histogram as a value.
+	EntryHistogram Entry = 5 // EntryHistogram marks a series with a native histogram as a value.
 )
diff --git a/promql/engine.go b/promql/engine.go
index daa46ab6f2..60c8e06394 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -573,7 +573,7 @@ func (ng *Engine) validateOpts(expr parser.Expr) error {
 	return validationErr
 }
 
-// NewTestQuery: inject special behaviour into Query for testing.
+// NewTestQuery injects special behaviour into Query for testing.
 func (ng *Engine) NewTestQuery(f func(context.Context) error) Query {
 	qry := &query{
 		q:           "test statement",
@@ -3531,14 +3531,14 @@ func makeInt64Pointer(val int64) *int64 {
 	return valp
 }
 
-// Add RatioSampler interface to allow unit-testing (previously: Randomizer).
+// RatioSampler allows unit-testing (previously: Randomizer).
 type RatioSampler interface {
 	// Return this sample "offset" between [0.0, 1.0]
 	sampleOffset(ts int64, sample *Sample) float64
 	AddRatioSample(r float64, sample *Sample) bool
 }
 
-// Use Hash(labels.String()) / maxUint64 as a "deterministic"
+// HashRatioSampler uses Hash(labels.String()) / maxUint64 as a "deterministic"
 // value in [0.0, 1.0].
 type HashRatioSampler struct{}
 
diff --git a/promql/parser/ast.go b/promql/parser/ast.go
index 830e8a2c5e..162d7817ab 100644
--- a/promql/parser/ast.go
+++ b/promql/parser/ast.go
@@ -352,8 +352,7 @@ func (f inspector) Visit(node Node, path []Node) (Visitor, error) {
 // f(node, path); node must not be nil. If f returns a nil error, Inspect invokes f
 // for all the non-nil children of node, recursively.
 func Inspect(node Node, f inspector) {
-	//nolint: errcheck
-	Walk(f, node, nil)
+	Walk(f, node, nil) //nolint:errcheck
 }
 
 // Children returns a list of all child nodes of a syntax tree node.
@@ -419,7 +418,7 @@ func mergeRanges(first, last Node) posrange.PositionRange {
 	}
 }
 
-// Item implements the Node interface.
+// PositionRange implements the Node interface.
 // This makes it possible to call mergeRanges on them.
 func (i *Item) PositionRange() posrange.PositionRange {
 	return posrange.PositionRange{
diff --git a/scrape/clientprotobuf.go b/scrape/clientprotobuf.go
index 2213268d59..e632035b40 100644
--- a/scrape/clientprotobuf.go
+++ b/scrape/clientprotobuf.go
@@ -23,7 +23,7 @@ import (
 	dto "github.com/prometheus/client_model/go"
 )
 
-// Write a MetricFamily into a protobuf.
+// MetricFamilyToProtobuf writes a MetricFamily into a protobuf.
 // This function is intended for testing scraping by providing protobuf serialized input.
 func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) {
 	buffer := &bytes.Buffer{}
@@ -34,7 +34,7 @@ func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) {
 	return buffer.Bytes(), nil
 }
 
-// Append a MetricFamily protobuf representation to a buffer.
+// AddMetricFamilyToProtobuf appends a MetricFamily protobuf representation to a buffer.
 // This function is intended for testing scraping by providing protobuf serialized input.
 func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error {
 	protoBuf, err := proto.Marshal(metricFamily)
diff --git a/storage/interface.go b/storage/interface.go
index f85f985e9d..2f125e5902 100644
--- a/storage/interface.go
+++ b/storage/interface.go
@@ -227,9 +227,9 @@ type LabelHints struct {
 	Limit int
 }
 
-// TODO(bwplotka): Move to promql/engine_test.go?
 // QueryableFunc is an adapter to allow the use of ordinary functions as
 // Queryables. It follows the idea of http.HandlerFunc.
+// TODO(bwplotka): Move to promql/engine_test.go?
 type QueryableFunc func(mint, maxt int64) (Querier, error)
 
 // Querier calls f() with the given parameters.
diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go
index 58520c6a5d..82f46b82f6 100644
--- a/storage/remote/azuread/azuread.go
+++ b/storage/remote/azuread/azuread.go
@@ -31,13 +31,15 @@ import (
 	"github.com/google/uuid"
 )
 
+// Clouds.
 const (
-	// Clouds.
 	AzureChina      = "AzureChina"
 	AzureGovernment = "AzureGovernment"
 	AzurePublic     = "AzurePublic"
+)
 
-	// Audiences.
+// Audiences.
+const (
 	IngestionChinaAudience      = "https://monitor.azure.cn//.default"
 	IngestionGovernmentAudience = "https://monitor.azure.us//.default"
 	IngestionPublicAudience     = "https://monitor.azure.com//.default"
diff --git a/template/template.go b/template/template.go
index c507dbe746..0698c6c8ac 100644
--- a/template/template.go
+++ b/template/template.go
@@ -166,7 +166,7 @@ func NewTemplateExpander(
 				return html_template.HTML(text)
 			},
 			"match":     regexp.MatchString,
-			"title":     strings.Title, //nolint:staticcheck
+			"title":     strings.Title, //nolint:staticcheck // TODO(beorn7): Need to come up with a replacement using the cases package.
 			"toUpper":   strings.ToUpper,
 			"toLower":   strings.ToLower,
 			"graphLink": strutil.GraphLinkForExpression,
diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go
index 6c8707c57b..876b42cb26 100644
--- a/tsdb/chunks/head_chunks.go
+++ b/tsdb/chunks/head_chunks.go
@@ -191,7 +191,7 @@ func (f *chunkPos) bytesToWriteForChunk(chkLen uint64) uint64 {
 // ChunkDiskMapper is for writing the Head block chunks to disk
 // and access chunks via mmapped files.
 type ChunkDiskMapper struct {
-	/// Writer.
+	// Writer.
 	dir             *os.File
 	writeBufferSize int
 
@@ -210,7 +210,7 @@ type ChunkDiskMapper struct {
 	crc32        hash.Hash
 	writePathMtx sync.Mutex
 
-	/// Reader.
+	// Reader.
 	// The int key in the map is the file number on the disk.
 	mmappedChunkFiles map[int]*mmappedChunkFile // Contains the m-mapped files for each chunk file mapped with its index.
 	closers           map[int]io.Closer         // Closers for resources behind the byte slices.
diff --git a/tsdb/db.go b/tsdb/db.go
index 706e5bbac1..a5b3a5e602 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -49,7 +49,7 @@ import (
 )
 
 const (
-	// Default duration of a block in milliseconds.
+	// DefaultBlockDuration in milliseconds.
 	DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond)
 
 	// Block dir suffixes to make deletion and creation operations atomic.
diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go
index cd98fbd82f..88fdd30c85 100644
--- a/tsdb/encoding/encoding.go
+++ b/tsdb/encoding/encoding.go
@@ -201,8 +201,8 @@ func (d *Decbuf) UvarintStr() string {
 	return string(d.UvarintBytes())
 }
 
-// The return value becomes invalid if the byte slice goes away.
-// Compared to UvarintStr, this avoid allocations.
+// UvarintBytes returns invalid values if the byte slice goes away.
+// Compared to UvarintStr, it avoid allocations.
 func (d *Decbuf) UvarintBytes() []byte {
 	l := d.Uvarint64()
 	if d.E != nil {
diff --git a/tsdb/head_other.go b/tsdb/head_other.go
index eb1b93a3e5..fea91530dc 100644
--- a/tsdb/head_other.go
+++ b/tsdb/head_other.go
@@ -26,7 +26,7 @@ func (s *memSeries) labels() labels.Labels {
 	return s.lset
 }
 
-// No-op when not using dedupelabels.
+// RebuildSymbolTable is a no-op when not using dedupelabels.
 func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable {
 	return nil
 }
diff --git a/tsdb/index/index.go b/tsdb/index/index.go
index 3621054598..0e0e353719 100644
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@@ -196,8 +196,9 @@ func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) {
 	return toc, d.Err()
 }
 
-// NewWriter returns a new Writer to the given filename. It serializes data in format version 2.
-// It uses the given encoder to encode each postings list.
+// NewWriterWithEncoder returns a new Writer to the given filename. It
+// serializes data in format version 2. It uses the given encoder to encode each
+// postings list.
 func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncoder) (*Writer, error) {
 	dir := filepath.Dir(fn)
 
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index cc98df4729..f1d06a421b 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -263,8 +263,8 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu
 	return nil, mc, err
 }
 
-// ChunkOrIterableWithCopy: implements ChunkReaderWithCopy. The special Copy behaviour
-// is only implemented for the in-order head chunk.
+// ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy
+// behaviour is only implemented for the in-order head chunk.
 func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
 	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
 	if !isOOO {
diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go
index c1eed78f6d..86e6f9c81d 100644
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@@ -58,15 +58,16 @@ type WriteTo interface {
 	StoreSeries([]record.RefSeries, int)
 	StoreMetadata([]record.RefMetadata)
 
-	// Next two methods are intended for garbage-collection: first we call
-	// UpdateSeriesSegment on all current series
+	// UpdateSeriesSegment and SeriesReset are intended for
+	// garbage-collection:
+	// First we call UpdateSeriesSegment on all current series.
 	UpdateSeriesSegment([]record.RefSeries, int)
-	// Then SeriesReset is called to allow the deletion
-	// of all series created in a segment lower than the argument.
+	// Then SeriesReset is called to allow the deletion of all series
+	// created in a segment lower than the argument.
 	SeriesReset(int)
 }
 
-// Used to notify the watcher that data has been written so that it can read.
+// WriteNotified notifies the watcher that data has been written so that it can read.
 type WriteNotified interface {
 	Notify()
 }
diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go
index 993e930cef..b14521f358 100644
--- a/tsdb/wlog/wlog.go
+++ b/tsdb/wlog/wlog.go
@@ -38,8 +38,8 @@ import (
 )
 
 const (
-	DefaultSegmentSize = 128 * 1024 * 1024 // 128 MB
-	pageSize           = 32 * 1024         // 32KB
+	DefaultSegmentSize = 128 * 1024 * 1024 // DefaultSegmentSize is 128 MB.
+	pageSize           = 32 * 1024         // pageSize is 32KB.
 	recordHeaderSize   = 7
 	WblDirName         = "wbl"
 )
diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go
index bc5d76db43..b0272b7fee 100644
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@@ -174,7 +174,7 @@ func NewInvalidQuantileWarning(q float64, pos posrange.PositionRange) error {
 	}
 }
 
-// NewInvalidQuantileWarning is used when the user specifies an invalid ratio
+// NewInvalidRatioWarning is used when the user specifies an invalid ratio
 // value, i.e. a float that is outside the range [-1, 1] or NaN.
 func NewInvalidRatioWarning(q, to float64, pos posrange.PositionRange) error {
 	return annoErr{
diff --git a/util/testutil/cmp.go b/util/testutil/cmp.go
index 370d191f3f..24d39d514c 100644
--- a/util/testutil/cmp.go
+++ b/util/testutil/cmp.go
@@ -23,13 +23,14 @@ import (
 	"github.com/prometheus/prometheus/model/labels"
 )
 
-// Replacement for require.Equal using go-cmp adapted for Prometheus data structures, instead of DeepEqual.
+// RequireEqual is a replacement for require.Equal using go-cmp adapted for
+// Prometheus data structures, instead of DeepEqual.
 func RequireEqual(t testing.TB, expected, actual interface{}, msgAndArgs ...interface{}) {
 	t.Helper()
 	RequireEqualWithOptions(t, expected, actual, nil, msgAndArgs...)
 }
 
-// As RequireEqual but allows extra cmp.Options.
+// RequireEqualWithOptions works like RequireEqual but allows extra cmp.Options.
 func RequireEqualWithOptions(t testing.TB, expected, actual interface{}, extra []cmp.Option, msgAndArgs ...interface{}) {
 	t.Helper()
 	options := append([]cmp.Option{cmp.Comparer(labels.Equal)}, extra...)

From b0aba26ed5f2abf280bef3360672953ecb8d185b Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Fri, 23 Aug 2024 08:20:20 +0200
Subject: [PATCH 47/51] tsdb: Fix ValNone typo in comment

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 tsdb/querier.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tsdb/querier.go b/tsdb/querier.go
index 2e15f0b084..912c950329 100644
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@@ -972,7 +972,7 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
 		// Check if the encoding has changed (i.e. we need to create a new
 		// chunk as chunks can't have multiple encoding types).
 		// For the first sample, the following condition will always be true as
-		// ValNoneNone != ValFloat | ValHistogram | ValFloatHistogram.
+		// ValNone != ValFloat | ValHistogram | ValFloatHistogram.
 		if currentValueType != prevValueType {
 			if prevValueType != chunkenc.ValNone {
 				p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt})

From 41c076196ef6626585cac34ec758e6582019c010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= <gyorgy.krajcsovits@grafana.com>
Date: Thu, 22 Aug 2024 11:36:47 +0200
Subject: [PATCH 48/51] New cases in Test_ChunkQuerier_OOOQuery and
 Test_Querier_OOOQuery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Case 1: OOO in-memory head chunk overlaps with first mmaped in-order chunk.

Query: |----------------------------------------------------------------|
InO:    |------mmap---------------||---------mem----------------------|
OOO:     |-----mem-----------|

This triggers ChunkOrIterableWithCopy not including OOO head chunks bug.

Similar to #14693 however testing the end of the interval doesn't
trigger the problem because there the in-order head chunk will be
trimmed with a tombstone, causing the code to switch to ChunkOrIterable
which was fixed.
See https://github.com/prometheus/prometheus/blob/a36d1a8a9261ba7169cf2fca862a606adc9f3d9d/tsdb/querier.go#L646
where len(p.bufIter.Intervals) will be non zero, because it includes the
tombstone to trim the result to the query max time.

Thus a new test is added to check the overlap at the beginning of the
interval that has a separate chunk, which does not need trimming.

Note: same test doesn't fail for sample querier in Test_Querier_OOOQuery
as that doesn't use copy, that is copyHeadChunk is false in the if
condition above.

Case 2:

OOO mmaped head chunk overlaps with first mmaped in-order chunk.

Query: |----------------------------------------------------------------|
InO:    |------mmap---------------||---------mem----------------------|
OOO:     |-----mmap-----------|                             |--mem--|

In this case the meta contains the reference of the in-order chunk and
no indication that a merge is needed with the OOO mmaped chunk.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/db_test.go | 377 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 304 insertions(+), 73 deletions(-)

diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index 6d266e8724..4e3a077f6a 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -5036,16 +5036,15 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa
 
 func Test_Querier_OOOQuery(t *testing.T) {
 	opts := DefaultOptions()
-	opts.OutOfOrderCapMax = 30
 	opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds()
 
 	series1 := labels.FromStrings("foo", "bar1")
 
+	type filterFunc func(t int64) bool
+	defaultFilterFunc := func(t int64) bool { return true }
+
 	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
-	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter func(int64) bool) ([]chunks.Sample, int) {
-		if filter == nil {
-			filter = func(int64) bool { return true }
-		}
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
 		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
@@ -5060,68 +5059,173 @@ func Test_Querier_OOOQuery(t *testing.T) {
 			totalAppended++
 		}
 		require.NoError(t, app.Commit())
+		require.Positive(t, totalAppended, 0) // Sanity check that filter is not too zealous.
 		return expSamples, totalAppended
 	}
 
+	type sampleBatch struct {
+		minT   int64
+		maxT   int64
+		filter filterFunc
+		isOOO  bool
+	}
+
 	tests := []struct {
-		name        string
-		queryMinT   int64
-		queryMaxT   int64
-		inOrderMinT int64
-		inOrderMaxT int64
-		oooMinT     int64
-		oooMaxT     int64
+		name      string
+		oooCap    int64
+		queryMinT int64
+		queryMaxT int64
+		batches   []sampleBatch
 	}{
 		{
-			name:        "query interval covering ooomint and inordermaxt returns all ingested samples",
-			queryMinT:   minutes(0),
-			queryMaxT:   minutes(200),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			name:      "query interval covering ooomint and inordermaxt returns all ingested samples",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
 		},
 		{
-			name:        "partial query interval returns only samples within interval",
-			queryMinT:   minutes(20),
-			queryMaxT:   minutes(180),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			name:      "partial query interval returns only samples within interval",
+			oooCap:    30,
+			queryMinT: minutes(20),
+			queryMaxT: minutes(180),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
 		},
 		{
-			name:        "query overlapping inorder and ooo samples returns all ingested samples",
-			queryMinT:   minutes(0),
-			queryMaxT:   minutes(200),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(180 - opts.OutOfOrderCapMax/2), // Make sure to fit into the OOO head.
-			oooMaxT:     minutes(180),
+			name:      "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(170),
+					maxT:   minutes(180),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(100),
+					maxT:   minutes(110),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    5,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
 		},
 	}
 	for _, tc := range tests {
 		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			opts.OutOfOrderCapMax = tc.oooCap
 			db := openTestDB(t, opts, nil)
 			db.DisableCompactions()
 			defer func() {
 				require.NoError(t, db.Close())
 			}()
 
-			var (
-				expSamples []chunks.Sample
-				inoSamples int
-			)
+			var expSamples []chunks.Sample
+			var oooSamples, appendedCount int
 
-			// Add in-order samples (at even minutes).
-			expSamples, inoSamples = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 0 })
-			// Sanity check that filter is not too zealous.
-			require.Positive(t, inoSamples, 0)
-
-			// Add out-of-order samples (at odd minutes).
-			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 1 })
-			// Sanity check that filter is not too zealous.
-			require.Positive(t, oooSamples, 0)
+			for _, batch := range tc.batches {
+				expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter)
+				if batch.isOOO {
+					oooSamples += appendedCount
+				}
+			}
 
 			sort.Slice(expSamples, func(i, j int) bool {
 				return expSamples[i].T() < expSamples[j].T()
@@ -5147,11 +5251,17 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 
 	series1 := labels.FromStrings("foo", "bar1")
 
+	type filterFunc func(t int64) bool
+	defaultFilterFunc := func(t int64) bool { return true }
+
 	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
-	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
 		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			if !filter(m / time.Minute.Milliseconds()) {
+				continue
+			}
 			_, err := app.Append(0, series1, m, float64(m))
 			if m >= queryMinT && m <= queryMaxT {
 				expSamples = append(expSamples, sample{t: m, f: float64(m)})
@@ -5160,39 +5270,158 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 			totalAppended++
 		}
 		require.NoError(t, app.Commit())
+		require.Positive(t, totalAppended) // Sanity check that filter is not too zealous.
 		return expSamples, totalAppended
 	}
 
+	type sampleBatch struct {
+		minT   int64
+		maxT   int64
+		filter filterFunc
+		isOOO  bool
+	}
+
 	tests := []struct {
-		name        string
-		queryMinT   int64
-		queryMaxT   int64
-		inOrderMinT int64
-		inOrderMaxT int64
-		oooMinT     int64
-		oooMaxT     int64
+		name      string
+		oooCap    int64
+		queryMinT int64
+		queryMaxT int64
+		batches   []sampleBatch
 	}{
 		{
-			name:        "query interval covering ooomint and inordermaxt returns all ingested samples",
-			queryMinT:   minutes(0),
-			queryMaxT:   minutes(200),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			name:      "query interval covering ooomint and inordermaxt returns all ingested samples",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
 		},
 		{
-			name:        "partial query interval returns only samples within interval",
-			queryMinT:   minutes(20),
-			queryMaxT:   minutes(180),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			name:      "partial query interval returns only samples within interval",
+			oooCap:    30,
+			queryMinT: minutes(20),
+			queryMaxT: minutes(180),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(170),
+					maxT:   minutes(180),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(100),
+					maxT:   minutes(110),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    5,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
 		},
 	}
 	for _, tc := range tests {
 		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			opts.OutOfOrderCapMax = tc.oooCap
 			db := openTestDB(t, opts, nil)
 			db.DisableCompactions()
 			defer func() {
@@ -5200,12 +5429,14 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 			}()
 
 			var expSamples []chunks.Sample
+			var oooSamples, appendedCount int
 
-			// Add in-order samples.
-			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
-
-			// Add out-of-order samples.
-			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+			for _, batch := range tc.batches {
+				expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter)
+				if batch.isOOO {
+					oooSamples += appendedCount
+				}
+			}
 
 			sort.Slice(expSamples, func(i, j int) bool {
 				return expSamples[i].T() < expSamples[j].T()

From 183bbc39a23123789baf77eb9f915103bb83fc83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= <gyorgy.krajcsovits@grafana.com>
Date: Thu, 22 Aug 2024 11:59:53 +0200
Subject: [PATCH 49/51] Make requesting merge with OOO head explicit in
 chunk.Meta
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/chunks/chunks.go      |  3 +++
 tsdb/ooo_head_read.go      | 17 +++++++++--------
 tsdb/ooo_head_read_test.go |  9 +++++----
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go
index ec0f6d4036..69201c6db7 100644
--- a/tsdb/chunks/chunks.go
+++ b/tsdb/chunks/chunks.go
@@ -133,6 +133,9 @@ type Meta struct {
 	// Time range the data covers.
 	// When MaxTime == math.MaxInt64 the chunk is still open and being appended to.
 	MinTime, MaxTime int64
+
+	// Flag to indicate that this meta needs merge with OOO data.
+	MergeOOO bool
 }
 
 // ChunkFromSamples requires all samples to have the same type.
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index f1d06a421b..a50decd623 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -91,10 +91,11 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 
 	addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) {
 		tmpChks = append(tmpChks, chunks.Meta{
-			MinTime: minT,
-			MaxTime: maxT,
-			Ref:     ref,
-			Chunk:   chunk,
+			MinTime:  minT,
+			MaxTime:  maxT,
+			Ref:      ref,
+			Chunk:    chunk,
+			MergeOOO: true,
 		})
 	}
 
@@ -160,6 +161,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 			if c.Chunk != nil {
 				(*chks)[len(*chks)-1].Chunk = c.Chunk
 			}
+			(*chks)[len(*chks)-1].MergeOOO = (*chks)[len(*chks)-1].MergeOOO || c.MergeOOO
 		}
 	}
 
@@ -241,8 +243,8 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader,
 }
 
 func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
-	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO && meta.Chunk == nil { // meta.Chunk can have a copy of OOO head samples, even on non-OOO chunk ID.
+	sid, _, _ := unpackHeadChunkRef(meta.Ref)
+	if !meta.MergeOOO {
 		return cr.cr.ChunkOrIterable(meta)
 	}
 
@@ -266,8 +268,7 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu
 // ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy
 // behaviour is only implemented for the in-order head chunk.
 func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
-	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
+	if !meta.MergeOOO {
 		return cr.cr.ChunkOrIterableWithCopy(meta)
 	}
 	chk, iter, err := cr.ChunkOrIterable(meta)
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
index f71d497320..e565933f83 100644
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@@ -308,9 +308,10 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 					var expChunks []chunks.Meta
 					for _, e := range tc.expChunks {
 						meta := chunks.Meta{
-							Chunk:   chunkenc.Chunk(nil),
-							MinTime: e.mint,
-							MaxTime: e.maxt,
+							Chunk:    chunkenc.Chunk(nil),
+							MinTime:  e.mint,
+							MaxTime:  e.maxt,
+							MergeOOO: true, // Only OOO chunks are tested here, so we always request merge from OOO head.
 						}
 
 						// Ref to whatever Ref the chunk has, that we refer to by ID
@@ -484,7 +485,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 		cr := NewHeadAndOOOChunkReader(db.head, 0, 1000, nil, nil, 0)
 		defer cr.Close()
 		c, iterable, err := cr.ChunkOrIterable(chunks.Meta{
-			Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300,
+			Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, MergeOOO: true,
 		})
 		require.Nil(t, iterable)
 		require.Equal(t, err, fmt.Errorf("not found"))

From b05f8aea613dc442514603c7045ee47f8b185d55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= <gyorgy.krajcsovits@grafana.com>
Date: Fri, 23 Aug 2024 17:44:03 +0200
Subject: [PATCH 50/51] Follow up change from #14354
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Chunks are now grouped slightly differently, thus counter reset
detection no longer works.

The chunk from 40-43 is now processed independent of the remaining chunks.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/db_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index cfba2c5eaf..a53f3b3db7 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -6105,7 +6105,7 @@ func testOOONativeHistogramsWithCounterResets(t *testing.T, scenario sampleTypeS
 					shouldReset: func(v int64) bool {
 						return v == 44
 					},
-					expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.CounterReset},
+					expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset},
 				},
 				{
 					from:                 50,

From c70a07ca1de450e9ab14ce0f2c3b018ae8c9cd33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= <gyorgy.krajcsovits@grafana.com>
Date: Fri, 23 Aug 2024 17:52:10 +0200
Subject: [PATCH 51/51] Remove stray comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

lint fix

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
---
 tsdb/ooo_head_read.go | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
index bd7b3098b3..7a4209b1fb 100644
--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@@ -100,11 +100,6 @@ func (c MultiChunk) Reset([]byte) {
 	// no-op
 }
 
-// lastGarbageCollectedMmapRef gives the last mmap chunk that may be being garbage collected and so
-// any chunk at or before this ref will not be considered. 0 disables this check.
-//
-// maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then
-// the oooHeadChunk will not be considered.
 func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
 	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))