promql: Limit extrapolation of delta/rate/increase

The new implementation detects the start and end of a series by looking at the average sample interval within the range. If the first (last) sample in the range is more than 1.1*interval distant from the beginning (end) of the range, it is considered the first (last) sample of the series as a whole, and extrapolation is limited to half the interval (rather than all the way to the beginning (end) of the range). In addition, if the extrapolated starting point of a counter (where it is zero) is within the range, it is used as the starting point of the series. Fixes #581
2025-03-05 20:59:13 -08:00 · 2015-11-28 21:13:41 +00:00 · 2015-11-28 21:13:41 +00:00 · c77c3a8c56
parent 7da42eee6e
commit c77c3a8c56
3 changed files with 97 additions and 48 deletions
--- a/promql/functions.go
+++ b/promql/functions.go
@ -19,7 +19,6 @@ import (
 	"regexp"
 	"sort"
 	"strconv"
 	"time"
 	"github.com/prometheus/common/model"
@ -44,28 +43,25 @@ func funcTime(ev *evaluator, args Expressions) model.Value {
 	}
 }
-// === delta(matrix model.ValMatrix) Vector ===
+// extrapolatedRate is a utility function for rate/increase/delta.
-func funcDelta(ev *evaluator, args Expressions) model.Value {
+// It calculates the rate (allowing for counter resets if isCounter is true),
-	// This function still takes a 2nd argument for use by rate() and increase().
+// extrapolates if the first/last sample is close to the boundary, and returns
-	isCounter := len(args) >= 2 && ev.evalInt(args[1]) > 0
+// the result as either per-second (if isRate is true) or overall.
 func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value {
 	ms := arg.(*MatrixSelector)
 	rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset)
 	rangeEnd := ev.Timestamp.Add(-ms.Offset)
 	resultVector := vector{}
-	// If we treat these metrics as counters, we need to fetch all values
+	matrixValue := ev.evalMatrix(ms)
 	// in the interval to find breaks in the timeseries' monotonicity.
 	// I.e. if a counter resets, we want to ignore that reset.
 	var matrixValue matrix
 	if isCounter {
 		matrixValue = ev.evalMatrix(args[0])
 	} else {
 		matrixValue = ev.evalMatrixBounds(args[0])
 	}
 	for _, samples := range matrixValue {
-		// No sense in trying to compute a delta without at least two points. Drop
+		// No sense in trying to compute a rate without at least two points. Drop
 		// this vector element.
 		if len(samples.Values) < 2 {
 			continue
 		}
 		var (
 			counterCorrection model.SampleValue
 			lastValue         model.SampleValue
@ -79,22 +75,48 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
 		}
 		resultValue := lastValue - samples.Values[0].Value + counterCorrection
-		targetInterval := args[0].(*MatrixSelector).Range
+		// Duration between first/last samples and boundary of range.
-		sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp)
+		durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds()
-		if sampledInterval == 0 {
+		durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds()
-			// Only found one sample. Cannot compute a rate from this.
+
-			continue
+		sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds()
 		averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1)
 		if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 {
 			// Counters cannot be negative. If we have any slope at
 			// all (i.e. resultValue went up), we can extrapolate
 			// the zero point of the counter. If the duration to the
 			// zero point is shorter than the durationToStart, we
 			// take the zero point as the start of the series,
 			// thereby avoiding extrapolation to negative counter
 			// values.
 			durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue)
 			if durationToZero < durationToStart {
 				durationToStart = durationToZero
 			}
 		}
 		// If the first/last samples are close to the boundaries of the range,
 		// extrapolate the result. This is as we expect that another sample
 		// will exist given the spacing between samples we've seen thus far,
 		// with an allowance for noise.
 		extrapolationThreshold := averageDurationBetweenSamples * 1.1
 		extrapolateToInterval := sampledInterval
 		if durationToStart < extrapolationThreshold {
 			extrapolateToInterval += durationToStart
 		} else {
 			extrapolateToInterval += averageDurationBetweenSamples / 2
 		}
 		if durationToEnd < extrapolationThreshold {
 			extrapolateToInterval += durationToEnd
 		} else {
 			extrapolateToInterval += averageDurationBetweenSamples / 2
 		}
 		resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval)
 		if isRate {
 			resultValue = resultValue / model.SampleValue(ms.Range.Seconds())
 		}
 		// Correct for differences in target vs. actual delta interval.
 		//
 		// Above, we didn't actually calculate the delta for the specified target
 		// interval, but for an interval between the first and last found samples
 		// under the target interval, which will usually have less time between
 		// them. Depending on how many samples are found under a target interval,
 		// the delta results are distorted and temporal aliasing occurs (ugly
 		// bumps). This effect is corrected for below.
 		intervalCorrection := model.SampleValue(targetInterval) / model.SampleValue(sampledInterval)
 		resultValue *= intervalCorrection
 		resultSample := &sample{
 			Metric:    samples.Metric,
@ -107,25 +129,19 @@ func funcDelta(ev *evaluator, args Expressions) model.Value {
 	return resultVector
 }
 // === delta(matrix model.ValMatrix) Vector ===
 func funcDelta(ev *evaluator, args Expressions) model.Value {
 	return extrapolatedRate(ev, args[0], false, false)
 }
 // === rate(node model.ValMatrix) Vector ===
 func funcRate(ev *evaluator, args Expressions) model.Value {
-	args = append(args, &NumberLiteral{1})
+	return extrapolatedRate(ev, args[0], true, true)
 	vector := funcDelta(ev, args).(vector)
 	// TODO: could be other type of model.ValMatrix in the future (right now, only
 	// MatrixSelector exists). Find a better way of getting the duration of a
 	// matrix, such as looking at the samples themselves.
 	interval := args[0].(*MatrixSelector).Range
 	for i := range vector {
 		vector[i].Value /= model.SampleValue(interval / time.Second)
 	}
 	return vector
 }
 // === increase(node model.ValMatrix) Vector ===
 func funcIncrease(ev *evaluator, args Expressions) model.Value {
-	args = append(args, &NumberLiteral{1})
+	return extrapolatedRate(ev, args[0], true, false)
 	return funcDelta(ev, args).(vector)
 }
 // === irate(node model.ValMatrix) Vector ===
--- a/promql/testdata/functions.test
+++ b/promql/testdata/functions.test
@ -63,6 +63,10 @@ eval instant at 50m increase(http_requests[50m])
 	{path="/foo"} 100
 	{path="/bar"}  90
 eval instant at 50m increase(http_requests[100m])
 	{path="/foo"} 100
 	{path="/bar"}  90
 clear
 # Tests for irate().
@ -87,10 +91,10 @@ load 5m
 	http_requests{job="app-server", instance="1", group="canary"}		0+80x10
 # deriv should return the same as rate in simple cases.
-eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
-eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
 # deriv should return correct result.
--- a/promql/testdata/legacy.test
+++ b/promql/testdata/legacy.test
@ -15,6 +15,14 @@ load 5m
 	testcounter_reset_middle	0+10x4 0+10x5
 	testcounter_reset_end    	0+10x9 0 10
 load 4m
 	testcounter_zero_cutoff{start="0m"}	0+240x10
 	testcounter_zero_cutoff{start="1m"}	60+240x10
 	testcounter_zero_cutoff{start="2m"}	120+240x10
 	testcounter_zero_cutoff{start="3m"}	180+240x10
 	testcounter_zero_cutoff{start="4m"}	240+240x10
 	testcounter_zero_cutoff{start="5m"}	300+240x10
 load 5m
 	label_grouping_test{a="aa", b="bb"}	0+10x10
 	label_grouping_test{a="a", b="abb"}	0+20x10
@ -151,11 +159,12 @@ eval instant at 50m delta(http_requests{group="canary", instance="1", job="app-s
 # Rates should calculate per-second rates.
-eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[60m])
+eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666
 # Counter resets at in the middle of range are handled correctly by rate().
-eval instant at 50m rate(testcounter_reset_middle[60m])
+eval instant at 50m rate(testcounter_reset_middle[50m])
 	{} 0.03
@ -163,6 +172,26 @@ eval instant at 50m rate(testcounter_reset_middle[60m])
 eval instant at 50m rate(testcounter_reset_end[5m])
 	{} 0
 # Zero cutoff for left-side extrapolation.
 eval instant at 10m rate(testcounter_zero_cutoff[20m])
 	{start="0m"} 0.5
 	{start="1m"} 0.55
 	{start="2m"} 0.6
 	{start="3m"} 0.65
 	{start="4m"} 0.7
 	{start="5m"} 0.6
 # Normal half-interval cutoff for left-side extrapolation.
 eval instant at 50m rate(testcounter_zero_cutoff[20m])
 	{start="0m"} 0.6
 	{start="1m"} 0.6
 	{start="2m"} 0.6
 	{start="3m"} 0.6
 	{start="4m"} 0.6
 	{start="5m"} 0.6
 # count_scalar for a non-empty vector should return scalar element count.
 eval instant at 50m count_scalar(http_requests)
 	8