From 5a4e4f69366ce123d803675802d72651212e5996 Mon Sep 17 00:00:00 2001 From: Joshua Hesketh Date: Thu, 17 Oct 2024 00:00:46 +1100 Subject: [PATCH] Fix stddev/stdvar when aggregating histograms, NaNs, and infinities (#14941) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit promql: Fix stddev/stdvar when aggregating histograms, NaNs, and Infs Native histograms are ignored when calculating stddev or stdvar. However, for the first series of each group, a `groupedAggregation` is always created. If the first series that was encountered is a histogram then it acts as the equivalent of a 0 point. This change creates the first `groupedAggregation` with the `seen` field set to `false` if the point is a histogram, thus ignoring it like the rest of the aggregation function does. A new `groupedAggregation` will then be created once an actual float value is encountered. This commit also sets the `floatValue` field of the `groupedAggregation` to `NaN`, if the first float value of a group is `NaN` or `±Inf`, so that the outcome is consistently `NaN` once those values are in the mix. (The added tests fail without this change). Signed-off-by: Joshua Hesketh Signed-off-by: beorn7 --------- Signed-off-by: Joshua Hesketh Signed-off-by: beorn7 Co-authored-by: beorn7 --- CHANGELOG.md | 3 + promql/engine.go | 10 +- promql/promqltest/testdata/aggregators.test | 157 ++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e7621186..cc3a68d8b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## unreleased +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 + ## 3.0.0-beta.1 / 2024-10-09 * [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 diff --git a/promql/engine.go b/promql/engine.go index 60b4b81384..5435db0fc4 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2926,7 +2926,15 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 3c91883960..e2eb381dbc 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -572,3 +572,160 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN