promql: Limit extrapolation of delta/rate/increase

The new implementation detects the start and end of a series by
looking at the average sample interval within the range. If the first
(last) sample in the range is more than 1.1*interval distant from the
beginning (end) of the range, it is considered the first (last) sample
of the series as a whole, and extrapolation is limited to half the
interval (rather than all the way to the beginning (end) of the
range). In addition, if the extrapolated starting point of a counter
(where it is zero) is within the range, it is used as the starting
point of the series.

Fixes #581
This commit is contained in:
Brian Brazil 2015-11-28 21:13:41 +00:00 committed by beorn7
parent 7da42eee6e
commit c77c3a8c56
3 changed files with 97 additions and 48 deletions

View file

@ -19,7 +19,6 @@ import (
"regexp" "regexp"
"sort" "sort"
"strconv" "strconv"
"time"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
@ -44,28 +43,25 @@ func funcTime(ev *evaluator, args Expressions) model.Value {
} }
} }
// === delta(matrix model.ValMatrix) Vector === // extrapolatedRate is a utility function for rate/increase/delta.
func funcDelta(ev *evaluator, args Expressions) model.Value { // It calculates the rate (allowing for counter resets if isCounter is true),
// This function still takes a 2nd argument for use by rate() and increase(). // extrapolates if the first/last sample is close to the boundary, and returns
isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0 // the result as either per-second (if isRate is true) or overall.
func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value {
ms := arg.(*MatrixSelector)
rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset)
rangeEnd := ev.Timestamp.Add(-ms.Offset)
resultVector := vector{} resultVector := vector{}
// If we treat these metrics as counters, we need to fetch all values matrixValue := ev.evalMatrix(ms)
// in the interval to find breaks in the timeseries' monotonicity.
// I.e. if a counter resets, we want to ignore that reset.
var matrixValue matrix
if isCounter {
matrixValue = ev.evalMatrix(args[0])
} else {
matrixValue = ev.evalMatrixBounds(args[0])
}
for _, samples := range matrixValue { for _, samples := range matrixValue {
// No sense in trying to compute a delta without at least two points. Drop // No sense in trying to compute a rate without at least two points. Drop
// this vector element. // this vector element.
if len(samples.Values) < 2 { if len(samples.Values) < 2 {
continue continue
} }
var ( var (
counterCorrection model.SampleValue counterCorrection model.SampleValue
lastValue model.SampleValue lastValue model.SampleValue
@ -79,22 +75,48 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
} }
resultValue := lastValue - samples.Values[0].Value + counterCorrection resultValue := lastValue - samples.Values[0].Value + counterCorrection
targetInterval := args[0].(*MatrixSelector).Range // Duration between first/last samples and boundary of range.
sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp) durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds()
if sampledInterval == 0 { durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds()
// Only found one sample. Cannot compute a rate from this.
continue sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds()
averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1)
if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 {
// Counters cannot be negative. If we have any slope at
// all (i.e. resultValue went up), we can extrapolate
// the zero point of the counter. If the duration to the
// zero point is shorter than the durationToStart, we
// take the zero point as the start of the series,
// thereby avoiding extrapolation to negative counter
// values.
durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue)
if durationToZero < durationToStart {
durationToStart = durationToZero
}
}
// If the first/last samples are close to the boundaries of the range,
// extrapolate the result. This is as we expect that another sample
// will exist given the spacing between samples we've seen thus far,
// with an allowance for noise.
extrapolationThreshold := averageDurationBetweenSamples * 1.1
extrapolateToInterval := sampledInterval
if durationToStart < extrapolationThreshold {
extrapolateToInterval += durationToStart
} else {
extrapolateToInterval += averageDurationBetweenSamples / 2
}
if durationToEnd < extrapolationThreshold {
extrapolateToInterval += durationToEnd
} else {
extrapolateToInterval += averageDurationBetweenSamples / 2
}
resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval)
if isRate {
resultValue = resultValue / model.SampleValue(ms.Range.Seconds())
} }
// Correct for differences in target vs. actual delta interval.
//
// Above, we didn't actually calculate the delta for the specified target
// interval, but for an interval between the first and last found samples
// under the target interval, which will usually have less time between
// them. Depending on how many samples are found under a target interval,
// the delta results are distorted and temporal aliasing occurs (ugly
// bumps). This effect is corrected for below.
intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval)
resultValue *= intervalCorrection
resultSample := &sample{ resultSample := &sample{
Metric: samples.Metric, Metric: samples.Metric,
@ -107,25 +129,19 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
return resultVector return resultVector
} }
// === delta(matrix model.ValMatrix) Vector ===
func funcDelta(ev *evaluator, args Expressions) model.Value {
return extrapolatedRate(ev, args[0], false, false)
}
// === rate(node model.ValMatrix) Vector === // === rate(node model.ValMatrix) Vector ===
func funcRate(ev *evaluator, args Expressions) model.Value { func funcRate(ev *evaluator, args Expressions) model.Value {
args = append(args, &NumberLiteral{1}) return extrapolatedRate(ev, args[0], true, true)
vector := funcDelta(ev, args).(vector)
// TODO: could be other type of model.ValMatrix in the future (right now, only
// MatrixSelector exists). Find a better way of getting the duration of a
// matrix, such as looking at the samples themselves.
interval := args[0].(*MatrixSelector).Range
for i := range vector {
vector[i].Value /= model.SampleValue(interval / time.Second)
}
return vector
} }
// === increase(node model.ValMatrix) Vector === // === increase(node model.ValMatrix) Vector ===
func funcIncrease(ev *evaluator, args Expressions) model.Value { func funcIncrease(ev *evaluator, args Expressions) model.Value {
args = append(args, &NumberLiteral{1}) return extrapolatedRate(ev, args[0], true, false)
return funcDelta(ev, args).(vector)
} }
// === irate(node model.ValMatrix) Vector === // === irate(node model.ValMatrix) Vector ===

View file

@ -63,6 +63,10 @@ eval instant at 50m increase(http_requests[50m])
{path="/foo"} 100 {path="/foo"} 100
{path="/bar"} 90 {path="/bar"} 90
eval instant at 50m increase(http_requests[100m])
{path="/foo"} 100
{path="/bar"} 90
clear clear
# Tests for irate(). # Tests for irate().
@ -87,10 +91,10 @@ load 5m
http_requests{job="app-server", instance="1", group="canary"} 0+80x10 http_requests{job="app-server", instance="1", group="canary"} 0+80x10
# deriv should return the same as rate in simple cases. # deriv should return the same as rate in simple cases.
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m]) eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666 {group="canary", instance="1", job="app-server"} 0.26666666666666666
eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m]) eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666 {group="canary", instance="1", job="app-server"} 0.26666666666666666
# deriv should return correct result. # deriv should return correct result.

View file

@ -15,6 +15,14 @@ load 5m
testcounter_reset_middle 0+10x4 0+10x5 testcounter_reset_middle 0+10x4 0+10x5
testcounter_reset_end 0+10x9 0 10 testcounter_reset_end 0+10x9 0 10
load 4m
testcounter_zero_cutoff{start="0m"} 0+240x10
testcounter_zero_cutoff{start="1m"} 60+240x10
testcounter_zero_cutoff{start="2m"} 120+240x10
testcounter_zero_cutoff{start="3m"} 180+240x10
testcounter_zero_cutoff{start="4m"} 240+240x10
testcounter_zero_cutoff{start="5m"} 300+240x10
load 5m load 5m
label_grouping_test{a="aa", b="bb"} 0+10x10 label_grouping_test{a="aa", b="bb"} 0+10x10
label_grouping_test{a="a", b="abb"} 0+20x10 label_grouping_test{a="a", b="abb"} 0+20x10
@ -151,11 +159,12 @@ eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-s
# Rates should calculate per-second rates. # Rates should calculate per-second rates.
eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m]) eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
{group="canary", instance="1", job="app-server"} 0.26666666666666666 {group="canary", instance="1", job="app-server"} 0.26666666666666666
# Counter resets at in the middle of range are handled correctly by rate(). # Counter resets at in the middle of range are handled correctly by rate().
eval instant at 50m rate(testcounter_reset_middle[60m]) eval instant at 50m rate(testcounter_reset_middle[50m])
{} 0.03 {} 0.03
@ -163,6 +172,26 @@ eval instant at 50m rate(testcounter_reset_middle[60m])
eval instant at 50m rate(testcounter_reset_end[5m]) eval instant at 50m rate(testcounter_reset_end[5m])
{} 0 {} 0
# Zero cutoff for left-side extrapolation.
eval instant at 10m rate(testcounter_zero_cutoff[20m])
{start="0m"} 0.5
{start="1m"} 0.55
{start="2m"} 0.6
{start="3m"} 0.65
{start="4m"} 0.7
{start="5m"} 0.6
# Normal half-interval cutoff for left-side extrapolation.
eval instant at 50m rate(testcounter_zero_cutoff[20m])
{start="0m"} 0.6
{start="1m"} 0.6
{start="2m"} 0.6
{start="3m"} 0.6
{start="4m"} 0.6
{start="5m"} 0.6
# count_scalar for a non-empty vector should return scalar element count. # count_scalar for a non-empty vector should return scalar element count.
eval instant at 50m count_scalar(http_requests) eval instant at 50m count_scalar(http_requests)
8 8