mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-24 21:24:05 -08:00
use Welford/Knuth method to compute standard deviation and variance (#4533)
* use Welford/Knuth method to compute standard deviation and variance, avoids float precision issues * use better method for calculating avg and avg_over_time Signed-off-by: Dan Cech <dcech@grafana.com>
This commit is contained in:
parent
7d01ead689
commit
9f4cb06a37
|
@ -1464,12 +1464,12 @@ func intersection(ls1, ls2 labels.Labels) labels.Labels {
|
|||
}
|
||||
|
||||
type groupedAggregation struct {
|
||||
labels labels.Labels
|
||||
value float64
|
||||
valuesSquaredSum float64
|
||||
groupCount int
|
||||
heap vectorByValueHeap
|
||||
reverseHeap vectorByReverseValueHeap
|
||||
labels labels.Labels
|
||||
value float64
|
||||
mean float64
|
||||
groupCount int
|
||||
heap vectorByValueHeap
|
||||
reverseHeap vectorByReverseValueHeap
|
||||
}
|
||||
|
||||
// aggregation evaluates an aggregation operation on a Vector.
|
||||
|
@ -1540,17 +1540,19 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||
sort.Sort(m)
|
||||
}
|
||||
result[groupingKey] = &groupedAggregation{
|
||||
labels: m,
|
||||
value: s.V,
|
||||
valuesSquaredSum: s.V * s.V,
|
||||
groupCount: 1,
|
||||
labels: m,
|
||||
value: s.V,
|
||||
mean: s.V,
|
||||
groupCount: 1,
|
||||
}
|
||||
inputVecLen := int64(len(vec))
|
||||
resultSize := k
|
||||
if k > inputVecLen {
|
||||
resultSize = inputVecLen
|
||||
}
|
||||
if op == itemTopK || op == itemQuantile {
|
||||
if op == itemStdvar || op == itemStddev {
|
||||
result[groupingKey].value = 0.0
|
||||
} else if op == itemTopK || op == itemQuantile {
|
||||
result[groupingKey].heap = make(vectorByValueHeap, 0, resultSize)
|
||||
heap.Push(&result[groupingKey].heap, &Sample{
|
||||
Point: Point{V: s.V},
|
||||
|
@ -1571,8 +1573,8 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||
group.value += s.V
|
||||
|
||||
case itemAvg:
|
||||
group.value += s.V
|
||||
group.groupCount++
|
||||
group.mean += (s.V - group.mean) / float64(group.groupCount)
|
||||
|
||||
case itemMax:
|
||||
if group.value < s.V || math.IsNaN(group.value) {
|
||||
|
@ -1588,9 +1590,10 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||
group.groupCount++
|
||||
|
||||
case itemStdvar, itemStddev:
|
||||
group.value += s.V
|
||||
group.valuesSquaredSum += s.V * s.V
|
||||
group.groupCount++
|
||||
delta := s.V - group.mean
|
||||
group.mean += delta / float64(group.groupCount)
|
||||
group.value += delta * (s.V - group.mean)
|
||||
|
||||
case itemTopK:
|
||||
if int64(len(group.heap)) < k || group.heap[0].V < s.V || math.IsNaN(group.heap[0].V) {
|
||||
|
@ -1626,18 +1629,16 @@ func (ev *evaluator) aggregation(op ItemType, grouping []string, without bool, p
|
|||
for _, aggr := range result {
|
||||
switch op {
|
||||
case itemAvg:
|
||||
aggr.value = aggr.value / float64(aggr.groupCount)
|
||||
aggr.value = aggr.mean
|
||||
|
||||
case itemCount, itemCountValues:
|
||||
aggr.value = float64(aggr.groupCount)
|
||||
|
||||
case itemStdvar:
|
||||
avg := aggr.value / float64(aggr.groupCount)
|
||||
aggr.value = aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg
|
||||
aggr.value = aggr.value / float64(aggr.groupCount)
|
||||
|
||||
case itemStddev:
|
||||
avg := aggr.value / float64(aggr.groupCount)
|
||||
aggr.value = math.Sqrt(aggr.valuesSquaredSum/float64(aggr.groupCount) - avg*avg)
|
||||
aggr.value = math.Sqrt(aggr.value / float64(aggr.groupCount))
|
||||
|
||||
case itemTopK:
|
||||
// The heap keeps the lowest value on top, so reverse it.
|
||||
|
|
|
@ -371,11 +371,12 @@ func aggrOverTime(vals []Value, enh *EvalNodeHelper, aggrFn func([]Point) float6
|
|||
// === avg_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||
func funcAvgOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||
var sum float64
|
||||
var mean, count float64
|
||||
for _, v := range values {
|
||||
sum += v.V
|
||||
count++
|
||||
mean += (v.V - mean) / count
|
||||
}
|
||||
return sum / float64(len(values))
|
||||
return mean
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -444,28 +445,28 @@ func funcQuantileOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) V
|
|||
// === stddev_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||
func funcStddevOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||
var sum, squaredSum, count float64
|
||||
var aux, count, mean float64
|
||||
for _, v := range values {
|
||||
sum += v.V
|
||||
squaredSum += v.V * v.V
|
||||
count++
|
||||
delta := v.V - mean
|
||||
mean += delta / count
|
||||
aux += delta * (v.V - mean)
|
||||
}
|
||||
avg := sum / count
|
||||
return math.Sqrt(squaredSum/count - avg*avg)
|
||||
return math.Sqrt(aux / count)
|
||||
})
|
||||
}
|
||||
|
||||
// === stdvar_over_time(Matrix ValueTypeMatrix) Vector ===
|
||||
func funcStdvarOverTime(vals []Value, args Expressions, enh *EvalNodeHelper) Vector {
|
||||
return aggrOverTime(vals, enh, func(values []Point) float64 {
|
||||
var sum, squaredSum, count float64
|
||||
var aux, count, mean float64
|
||||
for _, v := range values {
|
||||
sum += v.V
|
||||
squaredSum += v.V * v.V
|
||||
count++
|
||||
delta := v.V - mean
|
||||
mean += delta / count
|
||||
aux += delta * (v.V - mean)
|
||||
}
|
||||
avg := sum / count
|
||||
return squaredSum/count - avg*avg
|
||||
return aux / count
|
||||
})
|
||||
}
|
||||
|
||||
|
|
13
promql/testdata/aggregators.test
vendored
13
promql/testdata/aggregators.test
vendored
|
@ -90,6 +90,19 @@ eval instant at 50m stdvar by (instance)(http_requests)
|
|||
{instance="0"} 50000
|
||||
{instance="1"} 50000
|
||||
|
||||
# Float precision test for standard deviation and variance
|
||||
clear
|
||||
load 5m
|
||||
http_requests{job="api-server", instance="0", group="production"} 0+1.33x10
|
||||
http_requests{job="api-server", instance="1", group="production"} 0+1.33x10
|
||||
http_requests{job="api-server", instance="0", group="canary"} 0+1.33x10
|
||||
|
||||
eval instant at 50m stddev(http_requests)
|
||||
{} 0.0
|
||||
|
||||
eval instant at 50m stdvar(http_requests)
|
||||
{} 0.0
|
||||
|
||||
|
||||
|
||||
# Regression test for missing separator byte in labelsToGroupingKey.
|
||||
|
|
19
promql/testdata/functions.test
vendored
19
promql/testdata/functions.test
vendored
|
@ -374,6 +374,14 @@ eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1)
|
|||
{job="api-server", instance="0", group="canary"} 24000
|
||||
{job="api-server", instance="1", group="canary"} -32000
|
||||
|
||||
# Tests for avg_over_time
|
||||
clear
|
||||
load 10s
|
||||
metric 1 2 3 4 5
|
||||
|
||||
eval instant at 1m avg_over_time(metric[1m])
|
||||
{} 3
|
||||
|
||||
# Tests for stddev_over_time and stdvar_over_time.
|
||||
clear
|
||||
load 10s
|
||||
|
@ -385,6 +393,17 @@ eval instant at 1m stdvar_over_time(metric[1m])
|
|||
eval instant at 1m stddev_over_time(metric[1m])
|
||||
{} 3.249615
|
||||
|
||||
# Tests for stddev_over_time and stdvar_over_time #4927.
|
||||
clear
|
||||
load 10s
|
||||
metric 1.5990505637277868 1.5990505637277868 1.5990505637277868
|
||||
|
||||
eval instant at 1m stdvar_over_time(metric[1m])
|
||||
{} 0
|
||||
|
||||
eval instant at 1m stddev_over_time(metric[1m])
|
||||
{} 0
|
||||
|
||||
# Tests for quantile_over_time
|
||||
clear
|
||||
|
||||
|
|
Loading…
Reference in a new issue