[ENHANCEMENT] PromQL: use Kahan summation for sum()

This can give a more precise result, by keeping a separate running
compensation value to accumulate small errors.

See https://en.wikipedia.org/wiki/Kahan_summation_algorithm

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2024-05-09 14:29:34 +01:00
parent 3fd24d1cd7
commit ea82b49c33
2 changed files with 21 additions and 4 deletions

View file

@ -2730,7 +2730,7 @@ type groupedAggregation struct {
hasHistogram bool // Has at least 1 histogram sample aggregated. hasHistogram bool // Has at least 1 histogram sample aggregated.
floatValue float64 floatValue float64
histogramValue *histogram.FloatHistogram histogramValue *histogram.FloatHistogram
floatMean float64 floatMean float64 // Mean, or "compensating value" for Kahan summation.
groupCount int groupCount int
heap vectorByValueHeap heap vectorByValueHeap
} }
@ -2758,11 +2758,13 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
*group = groupedAggregation{ *group = groupedAggregation{
seen: true, seen: true,
floatValue: f, floatValue: f,
floatMean: f,
groupCount: 1, groupCount: 1,
} }
switch op { switch op {
case parser.SUM, parser.AVG: case parser.AVG:
group.floatMean = f
fallthrough
case parser.SUM:
if h == nil { if h == nil {
group.hasFloat = true group.hasFloat = true
} else { } else {
@ -2770,6 +2772,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
group.hasHistogram = true group.hasHistogram = true
} }
case parser.STDVAR, parser.STDDEV: case parser.STDVAR, parser.STDDEV:
group.floatMean = f
group.floatValue = 0 group.floatValue = 0
case parser.QUANTILE: case parser.QUANTILE:
group.heap = make(vectorByValueHeap, 1) group.heap = make(vectorByValueHeap, 1)
@ -2792,7 +2795,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
// point in copying the histogram in that case. // point in copying the histogram in that case.
} else { } else {
group.hasFloat = true group.hasFloat = true
group.floatValue += f group.floatValue, group.floatMean = kahanSumInc(f, group.floatValue, group.floatMean)
} }
case parser.AVG: case parser.AVG:
@ -2903,6 +2906,8 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
} }
if aggr.hasHistogram { if aggr.hasHistogram {
aggr.histogramValue.Compact(0) aggr.histogramValue.Compact(0)
} else {
aggr.floatValue += aggr.floatMean // Add Kahan summation compensating term.
} }
default: default:
// For other aggregations, we already have the right value. // For other aggregations, we already have the right value.

View file

@ -503,6 +503,18 @@ eval instant at 1m avg(data{test="-big"})
eval instant at 1m avg(data{test="bigzero"}) eval instant at 1m avg(data{test="bigzero"})
{} 0 {} 0
# Test summing extreme values.
clear
load 10s
data{test="ten",point="a"} 2
data{test="ten",point="b"} 8
data{test="ten",point="c"} 1e+100
data{test="ten",point="d"} -1e100
eval instant at 1m sum(data{test="ten"})
{} 10
clear clear
# Test that aggregations are deterministic. # Test that aggregations are deterministic.