From 106e20cde53c2724dfe948ad8c72d099b7cd9ff7 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@grafana.com>
Date: Thu, 7 Apr 2022 18:59:56 +0200
Subject: [PATCH 1/2] Histogram: Fix and simplify histogram_quantile

For conventional histograms, we need to gather all the individual
bucket timeseries at a data point to do the quantile calculation. The
code so far mirrored this behavior for the new native
histograms. However, since a single data point contains all the
buckets alreade, that's actually not needed. This PR simplifies the
code while still detecting a mix of conventional and native
histograms.

The weird signature calculation for the conventional histograms is
getting even weirder because of that. If this PR turns out to do the
right thing, I will implement a proper fix for the signature
calculation upstream.

Signed-off-by: beorn7 <beorn@grafana.com>
---
 promql/engine.go    |  2 -
 promql/functions.go | 95 ++++++++++++++++++---------------------------
 promql/quantile.go  |  5 ---
 3 files changed, 38 insertions(+), 64 deletions(-)

diff --git a/promql/engine.go b/promql/engine.go
index 0a086cca1b..ea8b073383 100644
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -989,8 +989,6 @@ type EvalNodeHelper struct {
 	Dmn map[uint64]labels.Labels
 	// funcHistogramQuantile for conventional histograms.
 	signatureToMetricWithBuckets map[string]*metricWithBuckets
-	// funcHistogramQuantile for the new histograms.
-	signatureToMetricWithHistograms map[string]*metricWithHistograms
 	// label_replace.
 	regex *regexp.Regexp
 
diff --git a/promql/functions.go b/promql/functions.go
index 32f3ad9ef3..a03b445f13 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -869,7 +869,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 	q := vals[0].(Vector)[0].V
 	inVec := vals[1].(Vector)
 	sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel)
-	ignoreSignature := make(map[string]bool) // For signatures having both new and old histograms.
 
 	if enh.signatureToMetricWithBuckets == nil {
 		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
@@ -878,45 +877,19 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			v.buckets = v.buckets[:0]
 		}
 	}
-	if enh.signatureToMetricWithHistograms == nil {
-		enh.signatureToMetricWithHistograms = map[string]*metricWithHistograms{}
-	} else {
-		for _, v := range enh.signatureToMetricWithHistograms {
-			v.histogram = nil
-		}
-	}
-	for _, el := range inVec {
-		l := sigf(el.Metric)
-		if ignoreSignature[l] {
-			continue
-		}
 
-		if el.H != nil { // It's a histogram type.
-			_, ok := enh.signatureToMetricWithBuckets[l]
-			if ok {
-				// This signature exists for both conventional
-				// and new histograms, which is not supported.
-				// TODO(beorn7): Issue a warning somehow.
-				delete(enh.signatureToMetricWithBuckets, l)
-				delete(enh.signatureToMetricWithHistograms, l)
-				ignoreSignature[l] = true
-				continue
-			}
+	var histogramSamples []Sample
 
-			_, ok = enh.signatureToMetricWithHistograms[l]
-			if ok {
-				panic(errors.New("histogram_quantile: vector cannot contain metrics with the same labelset"))
-			}
-			el.Metric = labels.NewBuilder(el.Metric).
-				Del(labels.BucketLabel, labels.MetricName).
-				Labels()
-
-			enh.signatureToMetricWithHistograms[l] = &metricWithHistograms{el.Metric, el.H}
+	for _, sample := range inVec {
+		// We are only looking for conventional buckets here. Remember
+		// the histograms for later treatment.
+		if sample.H != nil {
+			histogramSamples = append(histogramSamples, sample)
 			continue
 		}
 
 		upperBound, err := strconv.ParseFloat(
-			el.Metric.Get(model.BucketLabel), 64,
+			sample.Metric.Get(model.BucketLabel), 64,
 		)
 		if err != nil {
 			// Oops, no bucket label or malformed label value. Skip.
@@ -924,30 +897,47 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			continue
 		}
 
-		_, ok := enh.signatureToMetricWithHistograms[l]
-		if ok {
-			// This signature exists for both conventional and new
-			// histograms, which is not supported.
-			// TODO(beorn7): Issue a warning somehow.
-			delete(enh.signatureToMetricWithBuckets, l)
-			delete(enh.signatureToMetricWithHistograms, l)
-			ignoreSignature[l] = true
-			continue
-		}
+		l := sigf(sample.Metric)
 		// Add the metric name (which is always removed) to the signature to prevent combining multiple histograms
 		// with the same label set. See https://github.com/prometheus/prometheus/issues/9910
-		l = l + el.Metric.Get(model.MetricNameLabel)
+		// TODO(beorn7): What's the performance impact? We might need to implement this more efficiently.
+		l = l + sample.Metric.Get(model.MetricNameLabel)
 
 		mb, ok := enh.signatureToMetricWithBuckets[l]
 		if !ok {
-			el.Metric = labels.NewBuilder(el.Metric).
+			sample.Metric = labels.NewBuilder(sample.Metric).
 				Del(excludedLabels...).
 				Labels()
 
-			mb = &metricWithBuckets{el.Metric, nil}
+			mb = &metricWithBuckets{sample.Metric, nil}
 			enh.signatureToMetricWithBuckets[l] = mb
 		}
-		mb.buckets = append(mb.buckets, bucket{upperBound, el.V})
+		mb.buckets = append(mb.buckets, bucket{upperBound, sample.V})
+
+	}
+
+	// Now deal with the histograms.
+	for _, sample := range histogramSamples {
+		// We have to reconstruct the exact same signature as above for
+		// a conventional histogram, just ignoring any le label.
+		// TODO(beorn7): This replicates the inefficient implementation
+		// from above and has to be improved under the same
+		// circumstances.
+		l := string(sample.Metric.WithoutLabels().Bytes(enh.lblBuf))
+		l = l + sample.Metric.Get(model.MetricNameLabel)
+		if mb, ok := enh.signatureToMetricWithBuckets[l]; ok && len(mb.buckets) > 0 {
+			// At this data point, we have conventional histogram
+			// buckets and a native histogram with the same name and
+			// labels. Do not evaluate anything.
+			// TODO(beorn7): Issue a warning somehow.
+			delete(enh.signatureToMetricWithBuckets, l)
+			continue
+		}
+
+		enh.Out = append(enh.Out, Sample{
+			Metric: enh.DropMetricName(sample.Metric),
+			Point:  Point{V: histogramQuantile(q, sample.H)},
+		})
 	}
 
 	for _, mb := range enh.signatureToMetricWithBuckets {
@@ -959,15 +949,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 		}
 	}
 
-	for _, mh := range enh.signatureToMetricWithHistograms {
-		if mh.histogram != nil {
-			enh.Out = append(enh.Out, Sample{
-				Metric: mh.metric,
-				Point:  Point{V: histogramQuantile(q, mh.histogram)},
-			})
-		}
-	}
-
 	return enh.Out
 }
 
diff --git a/promql/quantile.go b/promql/quantile.go
index 4902bde923..dba8e1e712 100644
--- a/promql/quantile.go
+++ b/promql/quantile.go
@@ -47,11 +47,6 @@ type metricWithBuckets struct {
 	buckets buckets
 }
 
-type metricWithHistograms struct {
-	metric    labels.Labels
-	histogram *histogram.FloatHistogram
-}
-
 // bucketQuantile calculates the quantile 'q' based on the given buckets. The
 // buckets will be sorted by upperBound by this function (i.e. no sorting
 // needed before calling this function). The quantile value is interpolated

From 56db51c8267cd660e4f77ccf3821bb1968397e29 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@grafana.com>
Date: Tue, 12 Apr 2022 00:37:50 +0200
Subject: [PATCH 2/2] Histgram: Fix Compact for spans of only empty buckets

Signed-off-by: beorn7 <beorn@grafana.com>
---
 model/histogram/float_histogram.go      |  4 +++
 model/histogram/float_histogram_test.go | 34 ++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go
index 4eb3c0b2b8..475eca013b 100644
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@@ -430,7 +430,11 @@ func compactBuckets(buckets []float64, spans []Span, maxEmptyBuckets int) ([]flo
 				// Start of span.
 				if nEmpty == int(spans[iSpan].Length) {
 					// The whole span is empty.
+					offset := spans[iSpan].Offset
 					spans = append(spans[:iSpan], spans[iSpan+1:]...)
+					if len(spans) > iSpan {
+						spans[iSpan].Offset += offset + int32(nEmpty)
+					}
 					continue
 				}
 				spans[iSpan].Length -= uint32(nEmpty)
diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go
index cc8956a9d6..d4e9f8cc55 100644
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@@ -794,7 +794,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets at start or end of chunks, even in the middle",
+			"cut empty buckets at start or end of spans, even in the middle",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 6}},
 				PositiveBuckets: []float64{0, 0, 1, 3.3, 0, 0, 4.2, 0.1, 3.3, 0, 0, 0},
@@ -826,7 +826,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk",
+			"cut empty buckets from the middle of a span",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@@ -842,7 +842,19 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk, avoiding some due to maxEmptyBuckets",
+			"cut out a span containing only empty buckets",
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-4, 3}, {2, 2}, {3, 4}},
+				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
+			},
+			0,
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-2, 1}, {7, 4}},
+				PositiveBuckets: []float64{1, 3.3, 4.2, 0.1, 3.3},
+			},
+		},
+		{
+			"cut empty buckets from the middle of a span, avoiding some due to maxEmptyBuckets",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@@ -905,6 +917,22 @@ func TestFloatHistogramCompact(t *testing.T) {
 				NegativeBuckets: []float64{},
 			},
 		},
+		{
+			"multiple spans of only empty buckets",
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
+				PositiveBuckets: []float64{0, 0, 0, 0, 2, 3},
+				NegativeSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
+				NegativeBuckets: []float64{2, 3, 0, 0, 0, 0},
+			},
+			0,
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-1, 2}},
+				PositiveBuckets: []float64{2, 3},
+				NegativeSpans:   []Span{{-10, 2}},
+				NegativeBuckets: []float64{2, 3},
+			},
+		},
 	}
 
 	for _, c := range cases {