From feaa93da7796e3e8dd3e35ef96808a253982f296 Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Wed, 4 Oct 2023 18:53:55 +0800 Subject: [PATCH 1/2] Add warning when monotonicity is forced in the input to histogram_quantile Signed-off-by: Jeanette Tan --- promql/functions.go | 6 +++++- promql/quantile.go | 32 +++++++++++++++++++------------- util/annotations/annotations.go | 18 ++++++++++++++---- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index b1245f5a1..2a1a0b030 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -1168,10 +1168,14 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev for _, mb := range enh.signatureToMetricWithBuckets { if len(mb.buckets) > 0 { + res, forcedMonotonicity := bucketQuantile(q, mb.buckets) enh.Out = append(enh.Out, Sample{ Metric: mb.metric, - F: bucketQuantile(q, mb.buckets), + F: res, }) + if forcedMonotonicity { + annos.Add(annotations.NewHistogramQuantileForcedMonotonicityWarning(mb.metric.Get(labels.MetricName), args[1].PositionRange())) + } } } diff --git a/promql/quantile.go b/promql/quantile.go index 06a02a8d4..1ebc39aa2 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -71,15 +71,17 @@ type metricWithBuckets struct { // If q<0, -Inf is returned. // // If q>1, +Inf is returned. -func bucketQuantile(q float64, buckets buckets) float64 { +// +// We also return a bool to indicate if monotonicity needed to be forced. +func bucketQuantile(q float64, buckets buckets) (float64, bool) { if math.IsNaN(q) { - return math.NaN() + return math.NaN(), false } if q < 0 { - return math.Inf(-1) + return math.Inf(-1), false } if q > 1 { - return math.Inf(+1) + return math.Inf(+1), false } slices.SortFunc(buckets, func(a, b bucket) int { // We don't expect the bucket boundary to be a NaN. @@ -92,27 +94,27 @@ func bucketQuantile(q float64, buckets buckets) float64 { return 0 }) if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { - return math.NaN() + return math.NaN(), false } buckets = coalesceBuckets(buckets) - ensureMonotonic(buckets) + forcedMonotonic := ensureMonotonic(buckets) if len(buckets) < 2 { - return math.NaN() + return math.NaN(), false } observations := buckets[len(buckets)-1].count if observations == 0 { - return math.NaN() + return math.NaN(), false } rank := q * observations b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) if b == len(buckets)-1 { - return buckets[len(buckets)-2].upperBound + return buckets[len(buckets)-2].upperBound, forcedMonotonic } if b == 0 && buckets[0].upperBound <= 0 { - return buckets[0].upperBound + return buckets[0].upperBound, forcedMonotonic } var ( bucketStart float64 @@ -124,7 +126,7 @@ func bucketQuantile(q float64, buckets buckets) float64 { count -= buckets[b-1].count rank -= buckets[b-1].count } - return bucketStart + (bucketEnd-bucketStart)*(rank/count) + return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic } // histogramQuantile calculates the quantile 'q' based on the given histogram. @@ -370,9 +372,11 @@ func coalesceBuckets(buckets buckets) buckets { // // As a somewhat hacky solution until ingestion is atomic per scrape, we // calculate the "envelope" of the histogram buckets, essentially removing -// any decreases in the count between successive buckets. +// any decreases in the count between successive buckets. We return a bool +// to indicate if this monotonicity was forced or not. -func ensureMonotonic(buckets buckets) { +func ensureMonotonic(buckets buckets) bool { + forced := false max := buckets[0].count for i := 1; i < len(buckets); i++ { switch { @@ -380,8 +384,10 @@ func ensureMonotonic(buckets buckets) { max = buckets[i].count case buckets[i].count < max: buckets[i].count = max + forced = true } } + return forced } // quantile calculates the given quantile of a vector of samples. diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index e8b59dc7f..bff0100d9 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -100,10 +100,11 @@ var ( PromQLInfo = errors.New("PromQL info") PromQLWarning = errors.New("PromQL warning") - InvalidQuantileWarning = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning) - BadBucketLabelWarning = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel) - MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning) - MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning) + InvalidQuantileWarning = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning) + BadBucketLabelWarning = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel) + MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning) + MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning) + HistogramQuantileForcedMonotonicityWarning = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLWarning) PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo) ) @@ -155,6 +156,15 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi } } +// NewHistogramQuantileForcedMonotonicityWarning is used when the input (classic histograms) to +// histogram_quantile needs to be forced to be monotonic. +func NewHistogramQuantileForcedMonotonicityWarning(metricName string, pos posrange.PositionRange) annoErr { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityWarning, metricName), + } +} + // NewPossibleNonCounterInfo is used when a counter metric does not have the suffixes // _total, _sum or _count. func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) annoErr { From 0cbf0c1c68b7d5590d770db13707e62fac96277c Mon Sep 17 00:00:00 2001 From: Jeanette Tan Date: Fri, 6 Oct 2023 19:09:32 +0800 Subject: [PATCH 2/2] Revise according to code review Signed-off-by: Jeanette Tan --- promql/functions.go | 2 +- promql/quantile.go | 31 ++++++++----------------------- util/annotations/annotations.go | 30 +++++++++++++++--------------- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 2a1a0b030..8eb0cad8a 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -1174,7 +1174,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev F: res, }) if forcedMonotonicity { - annos.Add(annotations.NewHistogramQuantileForcedMonotonicityWarning(mb.metric.Get(labels.MetricName), args[1].PositionRange())) + annos.Add(annotations.NewHistogramQuantileForcedMonotonicityInfo(mb.metric.Get(labels.MetricName), args[1].PositionRange())) } } } diff --git a/promql/quantile.go b/promql/quantile.go index 1ebc39aa2..f28944868 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -344,37 +344,22 @@ func coalesceBuckets(buckets buckets) buckets { // The assumption that bucket counts increase monotonically with increasing // upperBound may be violated during: // -// * Recording rule evaluation of histogram_quantile, especially when rate() -// has been applied to the underlying bucket timeseries. -// * Evaluation of histogram_quantile computed over federated bucket -// timeseries, especially when rate() has been applied. -// -// This is because scraped data is not made available to rule evaluation or -// federation atomically, so some buckets are computed with data from the -// most recent scrapes, but the other buckets are missing data from the most -// recent scrape. +// - Circumstances where data is already inconsistent at the target's side. +// - Ingestion via the remote write receiver that Prometheus implements. +// - Optimisation of query execution where precision is sacrificed for other +// benefits, not by Prometheus but by systems built on top of it. // // Monotonicity is usually guaranteed because if a bucket with upper bound // u1 has count c1, then any bucket with a higher upper bound u > u1 must -// have counted all c1 observations and perhaps more, so that c >= c1. -// -// Randomly interspersed partial sampling breaks that guarantee, and rate() -// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from -// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The -// monotonicity is broken. It is exacerbated by rate() because under normal -// operation, cumulative counting of buckets will cause the bucket counts to -// diverge such that small differences from missing samples are not a problem. -// rate() removes this divergence.) +// have counted all c1 observations and perhaps more, so that c >= c1. // // bucketQuantile depends on that monotonicity to do a binary search for the // bucket with the φ-quantile count, so breaking the monotonicity // guarantee causes bucketQuantile() to return undefined (nonsense) results. // -// As a somewhat hacky solution until ingestion is atomic per scrape, we -// calculate the "envelope" of the histogram buckets, essentially removing -// any decreases in the count between successive buckets. We return a bool -// to indicate if this monotonicity was forced or not. - +// As a somewhat hacky solution, we calculate the "envelope" of the histogram +// buckets, essentially removing any decreases in the count between successive +// buckets. We return a bool to indicate if this monotonicity was forced or not. func ensureMonotonic(buckets buckets) bool { forced := false max := buckets[0].count diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index bff0100d9..082909411 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -100,13 +100,13 @@ var ( PromQLInfo = errors.New("PromQL info") PromQLWarning = errors.New("PromQL warning") - InvalidQuantileWarning = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning) - BadBucketLabelWarning = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel) - MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning) - MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning) - HistogramQuantileForcedMonotonicityWarning = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLWarning) + InvalidQuantileWarning = fmt.Errorf("%w: quantile value should be between 0 and 1", PromQLWarning) + BadBucketLabelWarning = fmt.Errorf("%w: bucket label %q is missing or has a malformed value", PromQLWarning, model.BucketLabel) + MixedFloatsHistogramsWarning = fmt.Errorf("%w: encountered a mix of histograms and floats for metric name", PromQLWarning) + MixedClassicNativeHistogramsWarning = fmt.Errorf("%w: vector contains a mix of classic and native histograms for metric name", PromQLWarning) - PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo) + PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count:", PromQLInfo) + HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (and may give inaccurate results) for metric name", PromQLInfo) ) type annoErr struct { @@ -156,15 +156,6 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi } } -// NewHistogramQuantileForcedMonotonicityWarning is used when the input (classic histograms) to -// histogram_quantile needs to be forced to be monotonic. -func NewHistogramQuantileForcedMonotonicityWarning(metricName string, pos posrange.PositionRange) annoErr { - return annoErr{ - PositionRange: pos, - Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityWarning, metricName), - } -} - // NewPossibleNonCounterInfo is used when a counter metric does not have the suffixes // _total, _sum or _count. func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) annoErr { @@ -173,3 +164,12 @@ func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) an Err: fmt.Errorf("%w %q", PossibleNonCounterInfo, metricName), } } + +// NewHistogramQuantileForcedMonotonicityInfo is used when the input (classic histograms) to +// histogram_quantile needs to be forced to be monotonic. +func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange.PositionRange) annoErr { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName), + } +}