Merge pull request #10564 from prometheus/beorn7/sparsehistogram

Histogram: Fix and simplify histogram_quantile
2025-03-05 20:59:13 -08:00 · 2022-04-12 15:23:04 +02:00 · 2022-04-12 15:23:04 +02:00 · 3966f9238c
parent f009e1c8f8 56db51c826
commit 3966f9238c
5 changed files with 73 additions and 67 deletions
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@ -430,7 +430,11 @@ func compactBuckets(buckets []float64, spans []Span, maxEmptyBuckets int) ([]flo
 				// Start of span.
 				if nEmpty == int(spans[iSpan].Length) {
 					// The whole span is empty.
+					offset := spans[iSpan].Offset
 					spans = append(spans[:iSpan], spans[iSpan+1:]...)
+					if len(spans) > iSpan {
+						spans[iSpan].Offset += offset + int32(nEmpty)
+					}
 					continue
 				}
 				spans[iSpan].Length -= uint32(nEmpty)
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@ -794,7 +794,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets at start or end of chunks, even in the middle",
+			"cut empty buckets at start or end of spans, even in the middle",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 6}},
 				PositiveBuckets: []float64{0, 0, 1, 3.3, 0, 0, 4.2, 0.1, 3.3, 0, 0, 0},
@ -826,7 +826,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk",
+			"cut empty buckets from the middle of a span",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -842,7 +842,19 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk, avoiding some due to maxEmptyBuckets",
+			"cut out a span containing only empty buckets",
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-4, 3}, {2, 2}, {3, 4}},
+				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
+			},
+			0,
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-2, 1}, {7, 4}},
+				PositiveBuckets: []float64{1, 3.3, 4.2, 0.1, 3.3},
+			},
+		},
+		{
+			"cut empty buckets from the middle of a span, avoiding some due to maxEmptyBuckets",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -905,6 +917,22 @@ func TestFloatHistogramCompact(t *testing.T) {
 				NegativeBuckets: []float64{},
 			},
 		},
+		{
+			"multiple spans of only empty buckets",
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
+				PositiveBuckets: []float64{0, 0, 0, 0, 2, 3},
+				NegativeSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
+				NegativeBuckets: []float64{2, 3, 0, 0, 0, 0},
+			},
+			0,
+			&FloatHistogram{
+				PositiveSpans:   []Span{{-1, 2}},
+				PositiveBuckets: []float64{2, 3},
+				NegativeSpans:   []Span{{-10, 2}},
+				NegativeBuckets: []float64{2, 3},
+			},
+		},
 	}

 	for _, c := range cases {
--- a/promql/engine.go
+++ b/promql/engine.go
@ -989,8 +989,6 @@ type EvalNodeHelper struct {
 	Dmn map[uint64]labels.Labels
 	// funcHistogramQuantile for conventional histograms.
 	signatureToMetricWithBuckets map[string]*metricWithBuckets
-	// funcHistogramQuantile for the new histograms.
-	signatureToMetricWithHistograms map[string]*metricWithHistograms
 	// label_replace.
 	regex *regexp.Regexp

--- a/promql/functions.go
+++ b/promql/functions.go
@ -869,7 +869,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 	q := vals[0].(Vector)[0].V
 	inVec := vals[1].(Vector)
 	sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel)
-	ignoreSignature := make(map[string]bool) // For signatures having both new and old histograms.

 	if enh.signatureToMetricWithBuckets == nil {
 		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
@ -878,45 +877,19 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			v.buckets = v.buckets[:0]
 		}
 	}
-	if enh.signatureToMetricWithHistograms == nil {
-		enh.signatureToMetricWithHistograms = map[string]*metricWithHistograms{}
-	} else {
-		for _, v := range enh.signatureToMetricWithHistograms {
-			v.histogram = nil
-		}
-	}
-	for _, el := range inVec {
-		l := sigf(el.Metric)
-		if ignoreSignature[l] {
-			continue
-		}

-		if el.H != nil { // It's a histogram type.
-			_, ok := enh.signatureToMetricWithBuckets[l]
-			if ok {
-				// This signature exists for both conventional
-				// and new histograms, which is not supported.
-				// TODO(beorn7): Issue a warning somehow.
-				delete(enh.signatureToMetricWithBuckets, l)
-				delete(enh.signatureToMetricWithHistograms, l)
-				ignoreSignature[l] = true
-				continue
-			}
+	var histogramSamples []Sample

-			_, ok = enh.signatureToMetricWithHistograms[l]
-			if ok {
-				panic(errors.New("histogram_quantile: vector cannot contain metrics with the same labelset"))
-			}
-			el.Metric = labels.NewBuilder(el.Metric).
-				Del(labels.BucketLabel, labels.MetricName).
-				Labels()
-
-			enh.signatureToMetricWithHistograms[l] = &metricWithHistograms{el.Metric, el.H}
+	for _, sample := range inVec {
+		// We are only looking for conventional buckets here. Remember
+		// the histograms for later treatment.
+		if sample.H != nil {
+			histogramSamples = append(histogramSamples, sample)
 			continue
 		}

 		upperBound, err := strconv.ParseFloat(
-			el.Metric.Get(model.BucketLabel), 64,
+			sample.Metric.Get(model.BucketLabel), 64,
 		)
 		if err != nil {
 			// Oops, no bucket label or malformed label value. Skip.
@ -924,30 +897,47 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			continue
 		}

-		_, ok := enh.signatureToMetricWithHistograms[l]
-		if ok {
-			// This signature exists for both conventional and new
-			// histograms, which is not supported.
-			// TODO(beorn7): Issue a warning somehow.
-			delete(enh.signatureToMetricWithBuckets, l)
-			delete(enh.signatureToMetricWithHistograms, l)
-			ignoreSignature[l] = true
-			continue
-		}
+		l := sigf(sample.Metric)
 		// Add the metric name (which is always removed) to the signature to prevent combining multiple histograms
 		// with the same label set. See https://github.com/prometheus/prometheus/issues/9910
-		l = l + el.Metric.Get(model.MetricNameLabel)
+		// TODO(beorn7): What's the performance impact? We might need to implement this more efficiently.
+		l = l + sample.Metric.Get(model.MetricNameLabel)

 		mb, ok := enh.signatureToMetricWithBuckets[l]
 		if !ok {
-			el.Metric = labels.NewBuilder(el.Metric).
+			sample.Metric = labels.NewBuilder(sample.Metric).
 				Del(excludedLabels...).
 				Labels()

-			mb = &metricWithBuckets{el.Metric, nil}
+			mb = &metricWithBuckets{sample.Metric, nil}
 			enh.signatureToMetricWithBuckets[l] = mb
 		}
-		mb.buckets = append(mb.buckets, bucket{upperBound, el.V})
+		mb.buckets = append(mb.buckets, bucket{upperBound, sample.V})
+
+	}
+
+	// Now deal with the histograms.
+	for _, sample := range histogramSamples {
+		// We have to reconstruct the exact same signature as above for
+		// a conventional histogram, just ignoring any le label.
+		// TODO(beorn7): This replicates the inefficient implementation
+		// from above and has to be improved under the same
+		// circumstances.
+		l := string(sample.Metric.WithoutLabels().Bytes(enh.lblBuf))
+		l = l + sample.Metric.Get(model.MetricNameLabel)
+		if mb, ok := enh.signatureToMetricWithBuckets[l]; ok && len(mb.buckets) > 0 {
+			// At this data point, we have conventional histogram
+			// buckets and a native histogram with the same name and
+			// labels. Do not evaluate anything.
+			// TODO(beorn7): Issue a warning somehow.
+			delete(enh.signatureToMetricWithBuckets, l)
+			continue
+		}
+
+		enh.Out = append(enh.Out, Sample{
+			Metric: enh.DropMetricName(sample.Metric),
+			Point:  Point{V: histogramQuantile(q, sample.H)},
+		})
 	}

 	for _, mb := range enh.signatureToMetricWithBuckets {
@ -959,15 +949,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 		}
 	}

-	for _, mh := range enh.signatureToMetricWithHistograms {
-		if mh.histogram != nil {
-			enh.Out = append(enh.Out, Sample{
-				Metric: mh.metric,
-				Point:  Point{V: histogramQuantile(q, mh.histogram)},
-			})
-		}
-	}
-
 	return enh.Out
 }

--- a/promql/quantile.go
+++ b/promql/quantile.go
@ -47,11 +47,6 @@ type metricWithBuckets struct {
 	buckets buckets
 }

-type metricWithHistograms struct {
-	metric    labels.Labels
-	histogram *histogram.FloatHistogram
-}
-
 // bucketQuantile calculates the quantile 'q' based on the given buckets. The
 // buckets will be sorted by upperBound by this function (i.e. no sorting
 // needed before calling this function). The quantile value is interpolated