Merge pull request #10564 from prometheus/beorn7/sparsehistogram

Histogram: Fix and simplify histogram_quantile
This commit is contained in:
Björn Rabenstein 2022-04-12 15:23:04 +02:00 committed by GitHub
commit 3966f9238c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 73 additions and 67 deletions

View file

@ -430,7 +430,11 @@ func compactBuckets(buckets []float64, spans []Span, maxEmptyBuckets int) ([]flo
// Start of span. // Start of span.
if nEmpty == int(spans[iSpan].Length) { if nEmpty == int(spans[iSpan].Length) {
// The whole span is empty. // The whole span is empty.
offset := spans[iSpan].Offset
spans = append(spans[:iSpan], spans[iSpan+1:]...) spans = append(spans[:iSpan], spans[iSpan+1:]...)
if len(spans) > iSpan {
spans[iSpan].Offset += offset + int32(nEmpty)
}
continue continue
} }
spans[iSpan].Length -= uint32(nEmpty) spans[iSpan].Length -= uint32(nEmpty)

View file

@ -794,7 +794,7 @@ func TestFloatHistogramCompact(t *testing.T) {
}, },
}, },
{ {
"cut empty buckets at start or end of chunks, even in the middle", "cut empty buckets at start or end of spans, even in the middle",
&FloatHistogram{ &FloatHistogram{
PositiveSpans: []Span{{-4, 6}, {3, 6}}, PositiveSpans: []Span{{-4, 6}, {3, 6}},
PositiveBuckets: []float64{0, 0, 1, 3.3, 0, 0, 4.2, 0.1, 3.3, 0, 0, 0}, PositiveBuckets: []float64{0, 0, 1, 3.3, 0, 0, 4.2, 0.1, 3.3, 0, 0, 0},
@ -826,7 +826,7 @@ func TestFloatHistogramCompact(t *testing.T) {
}, },
}, },
{ {
"cut empty buckets from the middle of a chunk", "cut empty buckets from the middle of a span",
&FloatHistogram{ &FloatHistogram{
PositiveSpans: []Span{{-4, 6}, {3, 3}}, PositiveSpans: []Span{{-4, 6}, {3, 3}},
PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3}, PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -842,7 +842,19 @@ func TestFloatHistogramCompact(t *testing.T) {
}, },
}, },
{ {
"cut empty buckets from the middle of a chunk, avoiding some due to maxEmptyBuckets", "cut out a span containing only empty buckets",
&FloatHistogram{
PositiveSpans: []Span{{-4, 3}, {2, 2}, {3, 4}},
PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
},
0,
&FloatHistogram{
PositiveSpans: []Span{{-2, 1}, {7, 4}},
PositiveBuckets: []float64{1, 3.3, 4.2, 0.1, 3.3},
},
},
{
"cut empty buckets from the middle of a span, avoiding some due to maxEmptyBuckets",
&FloatHistogram{ &FloatHistogram{
PositiveSpans: []Span{{-4, 6}, {3, 3}}, PositiveSpans: []Span{{-4, 6}, {3, 3}},
PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3}, PositiveBuckets: []float64{0, 0, 1, 0, 0, 3.3, 4.2, 0.1, 3.3},
@ -905,6 +917,22 @@ func TestFloatHistogramCompact(t *testing.T) {
NegativeBuckets: []float64{}, NegativeBuckets: []float64{},
}, },
}, },
{
"multiple spans of only empty buckets",
&FloatHistogram{
PositiveSpans: []Span{{-10, 2}, {2, 1}, {3, 3}},
PositiveBuckets: []float64{0, 0, 0, 0, 2, 3},
NegativeSpans: []Span{{-10, 2}, {2, 1}, {3, 3}},
NegativeBuckets: []float64{2, 3, 0, 0, 0, 0},
},
0,
&FloatHistogram{
PositiveSpans: []Span{{-1, 2}},
PositiveBuckets: []float64{2, 3},
NegativeSpans: []Span{{-10, 2}},
NegativeBuckets: []float64{2, 3},
},
},
} }
for _, c := range cases { for _, c := range cases {

View file

@ -989,8 +989,6 @@ type EvalNodeHelper struct {
Dmn map[uint64]labels.Labels Dmn map[uint64]labels.Labels
// funcHistogramQuantile for conventional histograms. // funcHistogramQuantile for conventional histograms.
signatureToMetricWithBuckets map[string]*metricWithBuckets signatureToMetricWithBuckets map[string]*metricWithBuckets
// funcHistogramQuantile for the new histograms.
signatureToMetricWithHistograms map[string]*metricWithHistograms
// label_replace. // label_replace.
regex *regexp.Regexp regex *regexp.Regexp

View file

@ -869,7 +869,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
q := vals[0].(Vector)[0].V q := vals[0].(Vector)[0].V
inVec := vals[1].(Vector) inVec := vals[1].(Vector)
sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel) sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel)
ignoreSignature := make(map[string]bool) // For signatures having both new and old histograms.
if enh.signatureToMetricWithBuckets == nil { if enh.signatureToMetricWithBuckets == nil {
enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{} enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{}
@ -878,45 +877,19 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
v.buckets = v.buckets[:0] v.buckets = v.buckets[:0]
} }
} }
if enh.signatureToMetricWithHistograms == nil {
enh.signatureToMetricWithHistograms = map[string]*metricWithHistograms{}
} else {
for _, v := range enh.signatureToMetricWithHistograms {
v.histogram = nil
}
}
for _, el := range inVec {
l := sigf(el.Metric)
if ignoreSignature[l] {
continue
}
if el.H != nil { // It's a histogram type. var histogramSamples []Sample
_, ok := enh.signatureToMetricWithBuckets[l]
if ok {
// This signature exists for both conventional
// and new histograms, which is not supported.
// TODO(beorn7): Issue a warning somehow.
delete(enh.signatureToMetricWithBuckets, l)
delete(enh.signatureToMetricWithHistograms, l)
ignoreSignature[l] = true
continue
}
_, ok = enh.signatureToMetricWithHistograms[l] for _, sample := range inVec {
if ok { // We are only looking for conventional buckets here. Remember
panic(errors.New("histogram_quantile: vector cannot contain metrics with the same labelset")) // the histograms for later treatment.
} if sample.H != nil {
el.Metric = labels.NewBuilder(el.Metric). histogramSamples = append(histogramSamples, sample)
Del(labels.BucketLabel, labels.MetricName).
Labels()
enh.signatureToMetricWithHistograms[l] = &metricWithHistograms{el.Metric, el.H}
continue continue
} }
upperBound, err := strconv.ParseFloat( upperBound, err := strconv.ParseFloat(
el.Metric.Get(model.BucketLabel), 64, sample.Metric.Get(model.BucketLabel), 64,
) )
if err != nil { if err != nil {
// Oops, no bucket label or malformed label value. Skip. // Oops, no bucket label or malformed label value. Skip.
@ -924,30 +897,47 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
continue continue
} }
_, ok := enh.signatureToMetricWithHistograms[l] l := sigf(sample.Metric)
if ok {
// This signature exists for both conventional and new
// histograms, which is not supported.
// TODO(beorn7): Issue a warning somehow.
delete(enh.signatureToMetricWithBuckets, l)
delete(enh.signatureToMetricWithHistograms, l)
ignoreSignature[l] = true
continue
}
// Add the metric name (which is always removed) to the signature to prevent combining multiple histograms // Add the metric name (which is always removed) to the signature to prevent combining multiple histograms
// with the same label set. See https://github.com/prometheus/prometheus/issues/9910 // with the same label set. See https://github.com/prometheus/prometheus/issues/9910
l = l + el.Metric.Get(model.MetricNameLabel) // TODO(beorn7): What's the performance impact? We might need to implement this more efficiently.
l = l + sample.Metric.Get(model.MetricNameLabel)
mb, ok := enh.signatureToMetricWithBuckets[l] mb, ok := enh.signatureToMetricWithBuckets[l]
if !ok { if !ok {
el.Metric = labels.NewBuilder(el.Metric). sample.Metric = labels.NewBuilder(sample.Metric).
Del(excludedLabels...). Del(excludedLabels...).
Labels() Labels()
mb = &metricWithBuckets{el.Metric, nil} mb = &metricWithBuckets{sample.Metric, nil}
enh.signatureToMetricWithBuckets[l] = mb enh.signatureToMetricWithBuckets[l] = mb
} }
mb.buckets = append(mb.buckets, bucket{upperBound, el.V}) mb.buckets = append(mb.buckets, bucket{upperBound, sample.V})
}
// Now deal with the histograms.
for _, sample := range histogramSamples {
// We have to reconstruct the exact same signature as above for
// a conventional histogram, just ignoring any le label.
// TODO(beorn7): This replicates the inefficient implementation
// from above and has to be improved under the same
// circumstances.
l := string(sample.Metric.WithoutLabels().Bytes(enh.lblBuf))
l = l + sample.Metric.Get(model.MetricNameLabel)
if mb, ok := enh.signatureToMetricWithBuckets[l]; ok && len(mb.buckets) > 0 {
// At this data point, we have conventional histogram
// buckets and a native histogram with the same name and
// labels. Do not evaluate anything.
// TODO(beorn7): Issue a warning somehow.
delete(enh.signatureToMetricWithBuckets, l)
continue
}
enh.Out = append(enh.Out, Sample{
Metric: enh.DropMetricName(sample.Metric),
Point: Point{V: histogramQuantile(q, sample.H)},
})
} }
for _, mb := range enh.signatureToMetricWithBuckets { for _, mb := range enh.signatureToMetricWithBuckets {
@ -959,15 +949,6 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
} }
} }
for _, mh := range enh.signatureToMetricWithHistograms {
if mh.histogram != nil {
enh.Out = append(enh.Out, Sample{
Metric: mh.metric,
Point: Point{V: histogramQuantile(q, mh.histogram)},
})
}
}
return enh.Out return enh.Out
} }

View file

@ -47,11 +47,6 @@ type metricWithBuckets struct {
buckets buckets buckets buckets
} }
type metricWithHistograms struct {
metric labels.Labels
histogram *histogram.FloatHistogram
}
// bucketQuantile calculates the quantile 'q' based on the given buckets. The // bucketQuantile calculates the quantile 'q' based on the given buckets. The
// buckets will be sorted by upperBound by this function (i.e. no sorting // buckets will be sorted by upperBound by this function (i.e. no sorting
// needed before calling this function). The quantile value is interpolated // needed before calling this function). The quantile value is interpolated