From c77c3a8c56cf72ffca212bd3d34c87cbf8bc772f Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Sat, 28 Nov 2015 21:13:41 +0000 Subject: [PATCH] promql: Limit extrapolation of delta/rate/increase The new implementation detects the start and end of a series by looking at the average sample interval within the range. If the first (last) sample in the range is more than 1.1*interval distant from the beginning (end) of the range, it is considered the first (last) sample of the series as a whole, and extrapolation is limited to half the interval (rather than all the way to the beginning (end) of the range). In addition, if the extrapolated starting point of a counter (where it is zero) is within the range, it is used as the starting point of the series. Fixes #581 --- promql/functions.go | 104 +++++++++++++++++++-------------- promql/testdata/functions.test | 8 ++- promql/testdata/legacy.test | 33 ++++++++++- 3 files changed, 97 insertions(+), 48 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index ebf44a06cc..09cde741c2 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -19,7 +19,6 @@ import ( "regexp" "sort" "strconv" - "time" "github.com/prometheus/common/model" @@ -44,28 +43,25 @@ func funcTime(ev *evaluator, args Expressions) model.Value { } } -// === delta(matrix model.ValMatrix) Vector === -func funcDelta(ev *evaluator, args Expressions) model.Value { - // This function still takes a 2nd argument for use by rate() and increase(). - isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0 +// extrapolatedRate is a utility function for rate/increase/delta. +// It calculates the rate (allowing for counter resets if isCounter is true), +// extrapolates if the first/last sample is close to the boundary, and returns +// the result as either per-second (if isRate is true) or overall. +func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value { + ms := arg.(*MatrixSelector) + + rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset) + rangeEnd := ev.Timestamp.Add(-ms.Offset) + resultVector := vector{} - // If we treat these metrics as counters, we need to fetch all values - // in the interval to find breaks in the timeseries' monotonicity. - // I.e. if a counter resets, we want to ignore that reset. - var matrixValue matrix - if isCounter { - matrixValue = ev.evalMatrix(args[0]) - } else { - matrixValue = ev.evalMatrixBounds(args[0]) - } + matrixValue := ev.evalMatrix(ms) for _, samples := range matrixValue { - // No sense in trying to compute a delta without at least two points. Drop + // No sense in trying to compute a rate without at least two points. Drop // this vector element. if len(samples.Values) < 2 { continue } - var ( counterCorrection model.SampleValue lastValue model.SampleValue @@ -79,22 +75,48 @@ func funcDelta(ev *evaluator, args Expressions) model.Value { } resultValue := lastValue - samples.Values[0].Value + counterCorrection - targetInterval := args[0].(*MatrixSelector).Range - sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp) - if sampledInterval == 0 { - // Only found one sample. Cannot compute a rate from this. - continue + // Duration between first/last samples and boundary of range. + durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds() + durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds() + + sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds() + averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1) + + if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 { + // Counters cannot be negative. If we have any slope at + // all (i.e. resultValue went up), we can extrapolate + // the zero point of the counter. If the duration to the + // zero point is shorter than the durationToStart, we + // take the zero point as the start of the series, + // thereby avoiding extrapolation to negative counter + // values. + durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue) + if durationToZero < durationToStart { + durationToStart = durationToZero + } + } + + // If the first/last samples are close to the boundaries of the range, + // extrapolate the result. This is as we expect that another sample + // will exist given the spacing between samples we've seen thus far, + // with an allowance for noise. + extrapolationThreshold := averageDurationBetweenSamples * 1.1 + extrapolateToInterval := sampledInterval + + if durationToStart < extrapolationThreshold { + extrapolateToInterval += durationToStart + } else { + extrapolateToInterval += averageDurationBetweenSamples / 2 + } + if durationToEnd < extrapolationThreshold { + extrapolateToInterval += durationToEnd + } else { + extrapolateToInterval += averageDurationBetweenSamples / 2 + } + resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval) + if isRate { + resultValue = resultValue / model.SampleValue(ms.Range.Seconds()) } - // Correct for differences in target vs. actual delta interval. - // - // Above, we didn't actually calculate the delta for the specified target - // interval, but for an interval between the first and last found samples - // under the target interval, which will usually have less time between - // them. Depending on how many samples are found under a target interval, - // the delta results are distorted and temporal aliasing occurs (ugly - // bumps). This effect is corrected for below. - intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval) - resultValue *= intervalCorrection resultSample := &sample{ Metric: samples.Metric, @@ -107,25 +129,19 @@ func funcDelta(ev *evaluator, args Expressions) model.Value { return resultVector } +// === delta(matrix model.ValMatrix) Vector === +func funcDelta(ev *evaluator, args Expressions) model.Value { + return extrapolatedRate(ev, args[0], false, false) +} + // === rate(node model.ValMatrix) Vector === func funcRate(ev *evaluator, args Expressions) model.Value { - args = append(args, &NumberLiteral{1}) - vector := funcDelta(ev, args).(vector) - - // TODO: could be other type of model.ValMatrix in the future (right now, only - // MatrixSelector exists). Find a better way of getting the duration of a - // matrix, such as looking at the samples themselves. - interval := args[0].(*MatrixSelector).Range - for i := range vector { - vector[i].Value /= model.SampleValue(interval / time.Second) - } - return vector + return extrapolatedRate(ev, args[0], true, true) } // === increase(node model.ValMatrix) Vector === func funcIncrease(ev *evaluator, args Expressions) model.Value { - args = append(args, &NumberLiteral{1}) - return funcDelta(ev, args).(vector) + return extrapolatedRate(ev, args[0], true, false) } // === irate(node model.ValMatrix) Vector === diff --git a/promql/testdata/functions.test b/promql/testdata/functions.test index d2bf59e024..bccec5a1a0 100644 --- a/promql/testdata/functions.test +++ b/promql/testdata/functions.test @@ -63,6 +63,10 @@ eval instant at 50m increase(http_requests[50m]) {path="/foo"} 100 {path="/bar"} 90 +eval instant at 50m increase(http_requests[100m]) + {path="/foo"} 100 + {path="/bar"} 90 + clear # Tests for irate(). @@ -87,10 +91,10 @@ load 5m http_requests{job="app-server", instance="1", group="canary"} 0+80x10 # deriv should return the same as rate in simple cases. -eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m]) +eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m]) {group="canary", instance="1", job="app-server"} 0.26666666666666666 -eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m]) +eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m]) {group="canary", instance="1", job="app-server"} 0.26666666666666666 # deriv should return correct result. diff --git a/promql/testdata/legacy.test b/promql/testdata/legacy.test index 6e47e32257..d80293f0f8 100644 --- a/promql/testdata/legacy.test +++ b/promql/testdata/legacy.test @@ -15,6 +15,14 @@ load 5m testcounter_reset_middle 0+10x4 0+10x5 testcounter_reset_end 0+10x9 0 10 +load 4m + testcounter_zero_cutoff{start="0m"} 0+240x10 + testcounter_zero_cutoff{start="1m"} 60+240x10 + testcounter_zero_cutoff{start="2m"} 120+240x10 + testcounter_zero_cutoff{start="3m"} 180+240x10 + testcounter_zero_cutoff{start="4m"} 240+240x10 + testcounter_zero_cutoff{start="5m"} 300+240x10 + load 5m label_grouping_test{a="aa", b="bb"} 0+10x10 label_grouping_test{a="a", b="abb"} 0+20x10 @@ -151,11 +159,12 @@ eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-s # Rates should calculate per-second rates. -eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m]) +eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m]) {group="canary", instance="1", job="app-server"} 0.26666666666666666 + # Counter resets at in the middle of range are handled correctly by rate(). -eval instant at 50m rate(testcounter_reset_middle[60m]) +eval instant at 50m rate(testcounter_reset_middle[50m]) {} 0.03 @@ -163,6 +172,26 @@ eval instant at 50m rate(testcounter_reset_middle[60m]) eval instant at 50m rate(testcounter_reset_end[5m]) {} 0 + +# Zero cutoff for left-side extrapolation. +eval instant at 10m rate(testcounter_zero_cutoff[20m]) + {start="0m"} 0.5 + {start="1m"} 0.55 + {start="2m"} 0.6 + {start="3m"} 0.65 + {start="4m"} 0.7 + {start="5m"} 0.6 + +# Normal half-interval cutoff for left-side extrapolation. +eval instant at 50m rate(testcounter_zero_cutoff[20m]) + {start="0m"} 0.6 + {start="1m"} 0.6 + {start="2m"} 0.6 + {start="3m"} 0.6 + {start="4m"} 0.6 + {start="5m"} 0.6 + + # count_scalar for a non-empty vector should return scalar element count. eval instant at 50m count_scalar(http_requests) 8