From d35d324ad0407062630bf4cc292c1ec60ec352eb Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Fri, 28 Feb 2025 20:21:40 +0000 Subject: [PATCH] promql: histogram_fraction for bucket histograms This PR extends the histogram_fraction function to also work with classic bucket histograms. This is beneficial because it allows expressions like sum(increase(my_bucket{le="0.5"}[10m]))/sum(increase(my_total[10m])) to be written without knowing the actual values of the "le" label, easing the transition to native histograms later on. It also feels natural since histogram_quantile also can deal with classic histograms. Signed-off-by: Michael Hoffmann --- promql/functions.go | 72 +++++++++++++++++++- promql/promqltest/testdata/histograms.test | 5 ++ promql/quantile.go | 78 ++++++++++++++++++++++ 3 files changed, 152 insertions(+), 3 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 3f977af56a..77100c032a 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -1433,16 +1433,65 @@ func funcHistogramStdVar(vals []parser.Value, _ parser.Expressions, enh *EvalNod } // === histogram_fraction(lower, upper parser.ValueTypeScalar, Vector parser.ValueTypeVector) (Vector, Annotations) === -func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { +func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { lower := vals[0].(Vector)[0].F upper := vals[1].(Vector)[0].F inVec := vals[2].(Vector) + if enh.signatureToMetricWithBuckets == nil { + enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{} + } else { + for _, v := range enh.signatureToMetricWithBuckets { + v.buckets = v.buckets[:0] + } + } + + var ( + annos annotations.Annotations + histogramSamples []Sample + ) + for _, sample := range inVec { - // Skip non-histogram samples. - if sample.H == nil { + // We are only looking for classic buckets here. Remember + // the histograms for later treatment. + if sample.H != nil { + histogramSamples = append(histogramSamples, sample) continue } + + upperBound, err := strconv.ParseFloat( + sample.Metric.Get(model.BucketLabel), 64, + ) + if err != nil { + annos.Add(annotations.NewBadBucketLabelWarning(sample.Metric.Get(labels.MetricName), sample.Metric.Get(model.BucketLabel), args[2].PositionRange())) + continue + } + enh.lblBuf = sample.Metric.BytesWithoutLabels(enh.lblBuf, labels.BucketLabel) + mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)] + if !ok { + sample.Metric = labels.NewBuilder(sample.Metric). + Del(excludedLabels...). + Labels() + mb = &metricWithBuckets{sample.Metric, nil} + enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb + } + mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F}) + } + + // Now deal with the native histograms. + for _, sample := range histogramSamples { + // We have to reconstruct the exact same signature as above for + // a classic histogram, just ignoring any le label. + enh.lblBuf = sample.Metric.Bytes(enh.lblBuf) + if mb, ok := enh.signatureToMetricWithBuckets[string(enh.lblBuf)]; ok && len(mb.buckets) > 0 { + // At this data point, we have classic histogram + // buckets and a native histogram with the same name and + // labels. Do not evaluate anything. + annos.Add(annotations.NewMixedClassicNativeHistogramsWarning(sample.Metric.Get(labels.MetricName), args[1].PositionRange())) + delete(enh.signatureToMetricWithBuckets, string(enh.lblBuf)) + continue + } + if !enh.enableDelayedNameRemoval { sample.Metric = sample.Metric.DropMetricName() } @@ -1452,6 +1501,23 @@ func funcHistogramFraction(vals []parser.Value, _ parser.Expressions, enh *EvalN DropName: true, }) } + + // Now do classic histograms that have already been filtered for conflicting native histograms. + for _, mb := range enh.signatureToMetricWithBuckets { + if len(mb.buckets) == 0 { + continue + } + if !enh.enableDelayedNameRemoval { + mb.metric = mb.metric.DropMetricName() + } + + enh.Out = append(enh.Out, Sample{ + Metric: mb.metric, + F: BucketFraction(lower, upper, mb.buckets), + DropName: true, + }) + } + return enh.Out, nil } diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test index 8ab23640af..87c16182fd 100644 --- a/promql/promqltest/testdata/histograms.test +++ b/promql/promqltest/testdata/histograms.test @@ -113,6 +113,11 @@ eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m])) {start="positive"} 0.6363636363636364 {start="negative"} 0 + +eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3_bucket[10m])) + {start="positive"} 0.6363636363636364 + {start="negative"} 0 + # In the classic histogram, we can access the corresponding bucket (if # it exists) and divide by the count to get the same result. diff --git a/promql/quantile.go b/promql/quantile.go index f3af82487c..4f30d0e569 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -448,6 +448,84 @@ func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 return (upperRank - lowerRank) / h.Count } +// BucketFraction is a version of HistogramFraction for classic histograms. +func BucketFraction(lower, upper float64, buckets Buckets) float64 { + slices.SortFunc(buckets, func(a, b Bucket) int { + // We don't expect the bucket boundary to be a NaN. + if a.UpperBound < b.UpperBound { + return -1 + } + if a.UpperBound > b.UpperBound { + return +1 + } + return 0 + }) + if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) { + return math.NaN() + } + buckets = coalesceBuckets(buckets) + + count := buckets[len(buckets)-1].Count + if count == 0 || math.IsNaN(lower) || math.IsNaN(upper) { + return math.NaN() + } + if lower >= upper { + return 0 + } + + var ( + rank, lowerRank, upperRank float64 + lowerSet, upperSet bool + ) + for i, b := range buckets { + lowerBound := math.Inf(-1) + if i > 0 { + lowerBound = buckets[i-1].UpperBound + } + upperBound := b.UpperBound + + interpolateLinearly := func(v float64) float64 { + return rank + b.Count*(v-lowerBound)/(upperBound-lowerBound) + } + + if !lowerSet && lowerBound >= lower { + // We have hit the lower value at the lower bucket boundary. + lowerRank = rank + lowerSet = true + } + if !upperSet && lowerBound >= upper { + // We have hit the upper value at the lower bucket boundary. + upperRank = rank + upperSet = true + } + if lowerSet && upperSet { + break + } + if !lowerSet && lowerBound < lower && upperBound > lower { + // The lower value is in this bucket. + lowerRank = interpolateLinearly(lower) + lowerSet = true + } + if !upperSet && lowerBound < upper && upperBound > upper { + // The upper value is in this bucket. + upperRank = interpolateLinearly(upper) + upperSet = true + } + if lowerSet && upperSet { + break + } + rank = b.Count + } + if !lowerSet || lowerRank > count { + lowerRank = count + } + if !upperSet || upperRank > count { + upperRank = count + } + + return (upperRank - lowerRank) / count +} + // coalesceBuckets merges buckets with the same upper bound. // // The input buckets must be sorted.