From d35d324ad0407062630bf4cc292c1ec60ec352eb Mon Sep 17 00:00:00 2001
From: Michael Hoffmann <mhoffmann@cloudflare.com>
Date: Fri, 28 Feb 2025 20:21:40 +0000
Subject: [PATCH] promql: histogram_fraction for bucket histograms

This PR extends the histogram_fraction function to also work with classic bucket histograms. This is beneficial because it allows expressions like sum(increase(my_bucket{le="0.5"}[10m]))/sum(increase(my_total[10m])) to be written without knowing the actual values of the "le" label, easing the transition to native histograms later on.
It also feels natural since histogram_quantile also can deal with classic histograms.

Signed-off-by: Michael Hoffmann <mhoffmann@cloudflare.com>
---
 promql/functions.go                        | 72 +++++++++++++++++++-
 promql/promqltest/testdata/histograms.test |  5 ++
 promql/quantile.go                         | 78 ++++++++++++++++++++++
 3 files changed, 152 insertions(+), 3 deletions(-)

diff --git a/promql/functions.go b/promql/functions.go
index 3f977af56a..77100c032a 100644
--- a/promql/functions.go
+++ b/promql/functions.go
@@ -1433,16 +1433,65 @@ func funcHistogramStdVar(vals []parser.Value, _ parser.Expressions, enh *EvalNod
 }
 
 // === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) ===
-func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	lower := vals[0].(Vector)[0].F
 	upper := vals[1].(Vector)[0].F
 	inVec := vals[2].(Vector)
 
+	if enh.signatureToMetricWithBuckets == nil {
+		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
+	} else {
+		for _, v := range enh.signatureToMetricWithBuckets {
+			v.buckets = v.buckets[:0]
+		}
+	}
+
+	var (
+		annos            annotations.Annotations
+		histogramSamples []Sample
+	)
+
 	for _, sample := range inVec {
-		// Skip non-histogram samples.
-		if sample.H == nil {
+		// We are only looking for classic buckets here. Remember
+		// the histograms for later treatment.
+		if sample.H != nil {
+			histogramSamples = append(histogramSamples, sample)
 			continue
 		}
+
+		upperBound, err := strconv.ParseFloat(
+			sample.Metric.Get(model.BucketLabel), 64,
+		)
+		if err != nil {
+			annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), args[2].PositionRange()))
+			continue
+		}
+		enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel)
+		mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]
+		if !ok {
+			sample.Metric = labels.NewBuilder(sample.Metric).
+				Del(excludedLabels...).
+				Labels()
+			mb = &metricWithBuckets{sample.Metric, nil}
+			enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb
+		}
+		mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F})
+	}
+
+	// Now deal with the native histograms.
+	for _, sample := range histogramSamples {
+		// We have to reconstruct the exact same signature as above for
+		// a classic histogram, just ignoring any le label.
+		enh.lblBuf = sample.Metric.Bytes(enh.lblBuf)
+		if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 {
+			// At this data point, we have classic histogram
+			// buckets and a native histogram with the same name and
+			// labels. Do not evaluate anything.
+			annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), args[1].PositionRange()))
+			delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf))
+			continue
+		}
+
 		if !enh.enableDelayedNameRemoval {
 			sample.Metric = sample.Metric.DropMetricName()
 		}
@@ -1452,6 +1501,23 @@ func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalN
 			DropName: true,
 		})
 	}
+
+	// Now do classic histograms that have already been filtered for conflicting native histograms.
+	for _, mb := range enh.signatureToMetricWithBuckets {
+		if len(mb.buckets) == 0 {
+			continue
+		}
+		if !enh.enableDelayedNameRemoval {
+			mb.metric = mb.metric.DropMetricName()
+		}
+
+		enh.Out = append(enh.Out, Sample{
+			Metric:   mb.metric,
+			F:        BucketFraction(lower, upper, mb.buckets),
+			DropName: true,
+		})
+	}
+
 	return enh.Out, nil
 }
 
diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test
index 8ab23640af..87c16182fd 100644
--- a/promql/promqltest/testdata/histograms.test
+++ b/promql/promqltest/testdata/histograms.test
@@ -113,6 +113,11 @@ eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m]))
 	{start="positive"} 0.6363636363636364
 	{start="negative"} 0
 
+
+eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3_bucket[10m]))
+	{start="positive"} 0.6363636363636364
+	{start="negative"} 0
+
 # In the classic histogram, we can access the corresponding bucket (if
 # it exists) and divide by the count to get the same result.
 
diff --git a/promql/quantile.go b/promql/quantile.go
index f3af82487c..4f30d0e569 100644
--- a/promql/quantile.go
+++ b/promql/quantile.go
@@ -448,6 +448,84 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6
 	return (upperRank - lowerRank) / h.Count
 }
 
+// BucketFraction is a version of HistogramFraction for classic histograms.
+func BucketFraction(lower, upper float64, buckets Buckets) float64 {
+	slices.SortFunc(buckets, func(a, b Bucket) int {
+		// We don't expect the bucket boundary to be a NaN.
+		if a.UpperBound < b.UpperBound {
+			return -1
+		}
+		if a.UpperBound > b.UpperBound {
+			return +1
+		}
+		return 0
+	})
+	if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) {
+		return math.NaN()
+	}
+	buckets = coalesceBuckets(buckets)
+
+	count := buckets[len(buckets)-1].Count
+	if count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
+		return math.NaN()
+	}
+	if lower >= upper {
+		return 0
+	}
+
+	var (
+		rank, lowerRank, upperRank float64
+		lowerSet, upperSet         bool
+	)
+	for i, b := range buckets {
+		lowerBound := math.Inf(-1)
+		if i > 0 {
+			lowerBound = buckets[i-1].UpperBound
+		}
+		upperBound := b.UpperBound
+
+		interpolateLinearly := func(v float64) float64 {
+			return rank + b.Count*(v-lowerBound)/(upperBound-lowerBound)
+		}
+
+		if !lowerSet && lowerBound >= lower {
+			// We have hit the lower value at the lower bucket boundary.
+			lowerRank = rank
+			lowerSet = true
+		}
+		if !upperSet && lowerBound >= upper {
+			// We have hit the upper value at the lower bucket boundary.
+			upperRank = rank
+			upperSet = true
+		}
+		if lowerSet && upperSet {
+			break
+		}
+		if !lowerSet && lowerBound < lower && upperBound > lower {
+			// The lower value is in this bucket.
+			lowerRank = interpolateLinearly(lower)
+			lowerSet = true
+		}
+		if !upperSet && lowerBound < upper && upperBound > upper {
+			// The upper value is in this bucket.
+			upperRank = interpolateLinearly(upper)
+			upperSet = true
+		}
+		if lowerSet && upperSet {
+			break
+		}
+		rank = b.Count
+	}
+	if !lowerSet || lowerRank > count {
+		lowerRank = count
+	}
+	if !upperSet || upperRank > count {
+		upperRank = count
+	}
+
+	return (upperRank - lowerRank) / count
+}
+
 // coalesceBuckets merges buckets with the same upper bound.
 //
 // The input buckets must be sorted.