Merge pull request #10564 from prometheus/beorn7/sparsehistogram

Histogram: Fix and simplify histogram_quantile
2025-03-05 20:59:13 -08:00 · 2022-04-12 15:23:04 +02:00 · 2022-04-12 15:23:04 +02:00 · 3966f9238c
parent f009e1c8f8 56db51c826
commit 3966f9238c
5 changed files with 73 additions and 67 deletions
--- a/model/histogram/float_histogram.go
+++ b/model/histogram/float_histogram.go
@ -430,7 +430,11 @@ func compactBuckets(buckets []float64, spans []Span, maxEmptyBuckets int) ([]flo
 				// Start of span.
 				if nEmpty == int(spans[iSpan].Length) {
 					// The whole span is empty.
 					offset := spans[iSpan].Offset
 					spans = append(spans[:iSpan], spans[iSpan+1:]...)
 					if len(spans) > iSpan {
 						spans[iSpan].Offset += offset + int32(nEmpty)
 					}
 					continue
 				}
 				spans[iSpan].Length -= uint32(nEmpty)
--- a/model/histogram/float_histogram_test.go
+++ b/model/histogram/float_histogram_test.go
@ -794,7 +794,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets at start or end of chunks, even in the middle",
+			"cut empty buckets at start or end of spans, even in the middle",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 6}},
 				PositiveBuckets: []float64{0, 0, 1, 3.3, 0, 0, 4.2, 0.1, 3.3, 0, 0, 0},
@ -826,7 +826,7 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk",
+			"cut empty buckets from the middle of a span",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -842,7 +842,19 @@ func TestFloatHistogramCompact(t *testing.T) {
 			},
 		},
 		{
-			"cut empty buckets from the middle of a chunk, avoiding some due to maxEmptyBuckets",
+			"cut out a span containing only empty buckets",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 3}, {2, 2}, {3, 4}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
 			},
 			0,
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-2, 1}, {7, 4}},
 				PositiveBuckets: []float64{1, 3.3, 4.2, 0.1, 3.3},
 			},
 		},
 		{
 			"cut empty buckets from the middle of a span, avoiding some due to maxEmptyBuckets",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-4, 6}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -905,6 +917,22 @@ func TestFloatHistogramCompact(t *testing.T) {
 				NegativeBuckets: []float64{},
 			},
 		},
 		{
 			"multiple spans of only empty buckets",
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
 				PositiveBuckets: []float64{0, 0, 0, 0, 2, 3},
 				NegativeSpans:   []Span{{-10, 2}, {2, 1}, {3, 3}},
 				NegativeBuckets: []float64{2, 3, 0, 0, 0, 0},
 			},
 			0,
 			&FloatHistogram{
 				PositiveSpans:   []Span{{-1, 2}},
 				PositiveBuckets: []float64{2, 3},
 				NegativeSpans:   []Span{{-10, 2}},
 				NegativeBuckets: []float64{2, 3},
 			},
 		},
 	}
 	for _, c := range cases {
--- a/promql/engine.go
+++ b/promql/engine.go
@ -989,8 +989,6 @@ type EvalNodeHelper struct {
 	Dmn map[uint64]labels.Labels
 	// funcHistogramQuantile for conventional histograms.
 	signatureToMetricWithBuckets map[string]*metricWithBuckets
 	// funcHistogramQuantile for the new histograms.
 	signatureToMetricWithHistograms map[string]*metricWithHistograms
 	// label_replace.
 	regex *regexp.Regexp
--- a/promql/functions.go
+++ b/promql/functions.go
@ -869,7 +869,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 	q := vals[0].(Vector)[0].V
 	inVec := vals[1].(Vector)
 	sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel)
 	ignoreSignature := make(map[string]bool) // For signatures having both new and old histograms.
 	if enh.signatureToMetricWithBuckets == nil {
 		enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
@ -878,45 +877,19 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			v.buckets = v.buckets[:0]
 		}
 	}
 	if enh.signatureToMetricWithHistograms == nil {
 		enh.signatureToMetricWithHistograms = map[string]*metricWithHistograms{}
 	} else {
 		for _, v := range enh.signatureToMetricWithHistograms {
 			v.histogram = nil
 		}
 	}
 	for _, el := range inVec {
 		l := sigf(el.Metric)
 		if ignoreSignature[l] {
 			continue
 		}
-		if el.H != nil { // It's a histogram type.
+	var histogramSamples []Sample
 			_, ok := enh.signatureToMetricWithBuckets[l]
 			if ok {
 				// This signature exists for both conventional
 				// and new histograms, which is not supported.
 				// TODO(beorn7): Issue a warning somehow.
 				delete(enh.signatureToMetricWithBuckets, l)
 				delete(enh.signatureToMetricWithHistograms, l)
 				ignoreSignature[l] = true
 				continue
 			}
-			_, ok = enh.signatureToMetricWithHistograms[l]
+	for _, sample := range inVec {
-			if ok {
+		// We are only looking for conventional buckets here. Remember
-				panic(errors.New("histogram_quantile: vector cannot contain metrics with the same labelset"))
+		// the histograms for later treatment.
-			}
+		if sample.H != nil {
-			el.Metric = labels.NewBuilder(el.Metric).
+			histogramSamples = append(histogramSamples, sample)
 				Del(labels.BucketLabel, labels.MetricName).
 				Labels()
 			enh.signatureToMetricWithHistograms[l] = &metricWithHistograms{el.Metric, el.H}
 			continue
 		}
 		upperBound, err := strconv.ParseFloat(
-			el.Metric.Get(model.BucketLabel), 64,
+			sample.Metric.Get(model.BucketLabel), 64,
 		)
 		if err != nil {
 			// Oops, no bucket label or malformed label value. Skip.
@ -924,30 +897,47 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			continue
 		}
-		_, ok := enh.signatureToMetricWithHistograms[l]
+		l := sigf(sample.Metric)
 		if ok {
 			// This signature exists for both conventional and new
 			// histograms, which is not supported.
 			// TODO(beorn7): Issue a warning somehow.
 			delete(enh.signatureToMetricWithBuckets, l)
 			delete(enh.signatureToMetricWithHistograms, l)
 			ignoreSignature[l] = true
 			continue
 		}
 		// Add the metric name (which is always removed) to the signature to prevent combining multiple histograms
 		// with the same label set. See https://github.com/prometheus/prometheus/issues/9910
-		l = l + el.Metric.Get(model.MetricNameLabel)
+		// TODO(beorn7): What's the performance impact? We might need to implement this more efficiently.
 		l = l + sample.Metric.Get(model.MetricNameLabel)
 		mb, ok := enh.signatureToMetricWithBuckets[l]
 		if !ok {
-			el.Metric = labels.NewBuilder(el.Metric).
+			sample.Metric = labels.NewBuilder(sample.Metric).
 				Del(excludedLabels...).
 				Labels()
-			mb = &metricWithBuckets{el.Metric, nil}
+			mb = &metricWithBuckets{sample.Metric, nil}
 			enh.signatureToMetricWithBuckets[l] = mb
 		}
-		mb.buckets = append(mb.buckets, bucket{upperBound, el.V})
+		mb.buckets = append(mb.buckets, bucket{upperBound, sample.V})
 	}
 	// Now deal with the histograms.
 	for _, sample := range histogramSamples {
 		// We have to reconstruct the exact same signature as above for
 		// a conventional histogram, just ignoring any le label.
 		// TODO(beorn7): This replicates the inefficient implementation
 		// from above and has to be improved under the same
 		// circumstances.
 		l := string(sample.Metric.WithoutLabels().Bytes(enh.lblBuf))
 		l = l + sample.Metric.Get(model.MetricNameLabel)
 		if mb, ok := enh.signatureToMetricWithBuckets[l]; ok && len(mb.buckets) > 0 {
 			// At this data point, we have conventional histogram
 			// buckets and a native histogram with the same name and
 			// labels. Do not evaluate anything.
 			// TODO(beorn7): Issue a warning somehow.
 			delete(enh.signatureToMetricWithBuckets, l)
 			continue
 		}
 		enh.Out = append(enh.Out, Sample{
 			Metric: enh.DropMetricName(sample.Metric),
 			Point:  Point{V: histogramQuantile(q, sample.H)},
 		})
 	}
 	for _, mb := range enh.signatureToMetricWithBuckets {
@ -959,15 +949,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 		}
 	}
 	for _, mh := range enh.signatureToMetricWithHistograms {
 		if mh.histogram != nil {
 			enh.Out = append(enh.Out, Sample{
 				Metric: mh.metric,
 				Point:  Point{V: histogramQuantile(q, mh.histogram)},
 			})
 		}
 	}
 	return enh.Out
 }
--- a/promql/quantile.go
+++ b/promql/quantile.go
@ -47,11 +47,6 @@ type metricWithBuckets struct {
 	buckets buckets
 }
 type metricWithHistograms struct {
 	metric    labels.Labels
 	histogram *histogram.FloatHistogram
 }
 // bucketQuantile calculates the quantile 'q' based on the given buckets. The
 // buckets will be sorted by upperBound by this function (i.e. no sorting
 // needed before calling this function). The quantile value is interpolated