From 5ee94f49a22c9c9df338f7acc27044fe75f078af Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 7 Aug 2024 15:30:01 +1000 Subject: [PATCH 1/5] Fix issue where `sum` over mixed exponential and custom buckets, or incompatible custom buckets, produces incorrect results Signed-off-by: Charles Korn --- promql/engine.go | 19 +++++++++---- .../testdata/native_histograms.test | 27 +++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 14c3706062..efb6c583fb 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2784,6 +2784,7 @@ type groupedAggregation struct { seen bool // Was this output groups seen in the input at this timestamp. hasFloat bool // Has at least 1 float64 sample aggregated. hasHistogram bool // Has at least 1 histogram sample aggregated. + abandonHistogram bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets. groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group. incrementalMean bool // True after reverting to incremental calculation of the mean value. } @@ -2809,10 +2810,11 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix // Initialize this group if it's the first time we've seen it. if !group.seen { *group = groupedAggregation{ - seen: true, - floatValue: f, - floatMean: f, - groupCount: 1, + seen: true, + floatValue: f, + floatMean: f, + abandonHistogram: false, + groupCount: 1, } switch op { case parser.AVG, parser.SUM: @@ -2833,6 +2835,10 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } + if group.abandonHistogram { + continue + } + switch op { case parser.SUM: if h != nil { @@ -2841,6 +2847,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix _, err := group.histogramValue.Add(h) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) + group.abandonHistogram = true } } // Otherwise the aggregation contained floats @@ -2987,7 +2994,9 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(e.Expr.PositionRange())) continue } - if aggr.hasHistogram { + if aggr.abandonHistogram { + continue + } else if aggr.hasHistogram { aggr.histogramValue.Compact(0) } else { aggr.floatValue += aggr.floatKahanC diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 034d73eb51..fc0517f75e 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -762,3 +762,30 @@ eval_warn instant at 30s rate(some_metric[30s]) # Test the case where we have more than two points for rate eval_warn instant at 1m rate(some_metric[1m]) {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}} + +# Test mixing exponential and custom buckets. +load 6m + metric{series="exponential"} {{sum:4 count:3 buckets:[1 2 1]}} _ {{sum:4 count:3 buckets:[1 2 1]}} + metric{series="other-exponential"} {{sum:3 count:2 buckets:[1 1 1]}} _ {{sum:3 count:2 buckets:[1 1 1]}} + metric{series="custom"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} + metric{series="other-custom"} _ {{schema:-53 sum:15 count:2 custom_values:[5 10] buckets:[0 2]}} {{schema:-53 sum:15 count:2 custom_values:[5 10] buckets:[0 2]}} + +# T=0: only exponential +# T=6: only custom +# T=12: mixed, should be ignored and emit an warning +eval_warn range from 0 to 12m step 6m sum(metric) + {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _ + +clear + +# Test incompatible custom bucket schemas. +load 6m + metric{series="1"} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} + metric{series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} _ {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} + metric{series="3"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} + +# T=0: incompatible, should be ignored and emit a warning +# T=6: compatible +# T=12: incompatible followed by compatible, should be ignored and emit a warning +eval_warn range from 0 to 12m step 6m sum(metric) + {} _ {{schema:-53 sum:2 count:2 custom_values:[5 10] buckets:[2]}} _ From f07b3ae67be5620cef9ff5520ba44d94f1216cb9 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 7 Aug 2024 15:32:35 +1000 Subject: [PATCH 2/5] Fix issue where `avg` over mixed exponential and custom buckets, or incompatible custom buckets, produces incorrect results or panics Signed-off-by: Charles Korn --- promql/engine.go | 6 ++++++ promql/promqltest/testdata/native_histograms.test | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index efb6c583fb..d51ed92c56 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2868,10 +2868,14 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix toAdd, err := left.Sub(right) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) + group.abandonHistogram = true + continue } _, err = group.histogramValue.Add(toAdd) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) + group.abandonHistogram = true + continue } } // Otherwise the aggregation contained floats @@ -2968,6 +2972,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } switch { + case aggr.abandonHistogram: + continue case aggr.hasHistogram: aggr.histogramValue = aggr.histogramValue.Compact(0) case aggr.incrementalMean: diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index fc0517f75e..62fac87c14 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -776,6 +776,9 @@ load 6m eval_warn range from 0 to 12m step 6m sum(metric) {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _ +eval_warn range from 0 to 12m step 6m avg(metric) + {} {{sum:3.5 count:2.5 buckets:[1 1.5 1]}} {{schema:-53 sum:8 count:1.5 custom_values:[5 10] buckets:[0.5 1]}} _ + clear # Test incompatible custom bucket schemas. @@ -789,3 +792,6 @@ load 6m # T=12: incompatible followed by compatible, should be ignored and emit a warning eval_warn range from 0 to 12m step 6m sum(metric) {} _ {{schema:-53 sum:2 count:2 custom_values:[5 10] buckets:[2]}} _ + +eval_warn range from 0 to 12m step 6m avg(metric) + {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _ From 0f4bc87b4fde3b4d9483a62a6b4f8fe3286c84bd Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 7 Aug 2024 15:35:06 +1000 Subject: [PATCH 3/5] Make linter happy Signed-off-by: Charles Korn --- promql/engine.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index d51ed92c56..6f0c64d420 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3000,11 +3000,12 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(e.Expr.PositionRange())) continue } - if aggr.abandonHistogram { + switch { + case aggr.abandonHistogram: continue - } else if aggr.hasHistogram { + case aggr.hasHistogram: aggr.histogramValue.Compact(0) - } else { + default: aggr.floatValue += aggr.floatKahanC } default: From 82bb35fabb609b9da87c6c15931917486ca8911a Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 9 Aug 2024 13:51:31 +1000 Subject: [PATCH 4/5] Address PR feedback: fix typo and rename variable Signed-off-by: Charles Korn --- promql/engine.go | 34 +++++++++---------- .../testdata/native_histograms.test | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index 6f0c64d420..b20690a6d6 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2781,12 +2781,12 @@ type groupedAggregation struct { heap vectorByValueHeap // All bools together for better packing within the struct. - seen bool // Was this output groups seen in the input at this timestamp. - hasFloat bool // Has at least 1 float64 sample aggregated. - hasHistogram bool // Has at least 1 histogram sample aggregated. - abandonHistogram bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets. - groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group. - incrementalMean bool // True after reverting to incremental calculation of the mean value. + seen bool // Was this output groups seen in the input at this timestamp. + hasFloat bool // Has at least 1 float64 sample aggregated. + hasHistogram bool // Has at least 1 histogram sample aggregated. + incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets. + groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group. + incrementalMean bool // True after reverting to incremental calculation of the mean value. } // aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix. @@ -2810,11 +2810,11 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix // Initialize this group if it's the first time we've seen it. if !group.seen { *group = groupedAggregation{ - seen: true, - floatValue: f, - floatMean: f, - abandonHistogram: false, - groupCount: 1, + seen: true, + floatValue: f, + floatMean: f, + incompatibleHistograms: false, + groupCount: 1, } switch op { case parser.AVG, parser.SUM: @@ -2835,7 +2835,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } - if group.abandonHistogram { + if group.incompatibleHistograms { continue } @@ -2847,7 +2847,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix _, err := group.histogramValue.Add(h) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) - group.abandonHistogram = true + group.incompatibleHistograms = true } } // Otherwise the aggregation contained floats @@ -2868,13 +2868,13 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix toAdd, err := left.Sub(right) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) - group.abandonHistogram = true + group.incompatibleHistograms = true continue } _, err = group.histogramValue.Add(toAdd) if err != nil { handleAggregationError(err, e, inputMatrix[si].Metric.Get(model.MetricNameLabel), &annos) - group.abandonHistogram = true + group.incompatibleHistograms = true continue } } @@ -2972,7 +2972,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } switch { - case aggr.abandonHistogram: + case aggr.incompatibleHistograms: continue case aggr.hasHistogram: aggr.histogramValue = aggr.histogramValue.Compact(0) @@ -3001,7 +3001,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix continue } switch { - case aggr.abandonHistogram: + case aggr.incompatibleHistograms: continue case aggr.hasHistogram: aggr.histogramValue.Compact(0) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 62fac87c14..09b02f6419 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -772,7 +772,7 @@ load 6m # T=0: only exponential # T=6: only custom -# T=12: mixed, should be ignored and emit an warning +# T=12: mixed, should be ignored and emit a warning eval_warn range from 0 to 12m step 6m sum(metric) {} {{sum:7 count:5 buckets:[2 3 2]}} {{schema:-53 sum:16 count:3 custom_values:[5 10] buckets:[1 2]}} _ From 5cfdde327c2176da01b7f418d3521e5682231340 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 9 Aug 2024 13:57:37 +1000 Subject: [PATCH 5/5] Address PR feedback: add extra test case Signed-off-by: Charles Korn --- .../testdata/native_histograms.test | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 09b02f6419..bb99afd476 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -795,3 +795,24 @@ eval_warn range from 0 to 12m step 6m sum(metric) eval_warn range from 0 to 12m step 6m avg(metric) {} _ {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} _ + +clear + +load 1m + metric{group="just-floats", series="1"} 2 + metric{group="just-floats", series="2"} 3 + metric{group="just-exponential-histograms", series="1"} {{sum:3 count:4 buckets:[1 2 1]}} + metric{group="just-exponential-histograms", series="2"} {{sum:2 count:3 buckets:[1 1 1]}} + metric{group="just-custom-histograms", series="1"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} + metric{group="just-custom-histograms", series="2"} {{schema:-53 sum:3 count:4 custom_values:[2] buckets:[7]}} + metric{group="floats-and-histograms", series="1"} 2 + metric{group="floats-and-histograms", series="2"} {{sum:2 count:3 buckets:[1 1 1]}} + metric{group="exponential-and-custom-histograms", series="1"} {{sum:2 count:3 buckets:[1 1 1]}} + metric{group="exponential-and-custom-histograms", series="2"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} + metric{group="incompatible-custom-histograms", series="1"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} + metric{group="incompatible-custom-histograms", series="2"} {{schema:-53 sum:1 count:1 custom_values:[2] buckets:[1]}} + +eval_warn instant at 0 sum by (group) (metric) + {group="just-floats"} 5 + {group="just-exponential-histograms"} {{sum:5 count:7 buckets:[2 3 2]}} + {group="just-custom-histograms"} {{schema:-53 sum:4 count:5 custom_values:[2] buckets:[8]}}