[BUGFIX] PromQL: Fix deriv, predict_linear and double_exponential_smoothing with histograms (#15686)

PromQL: Fix deriv, predict_linear and double_exponential_smoothing with histograms Signed-off-by: Neeraj Gartia <neerajgartia211002@gmail.com> --------- Signed-off-by: Neeraj Gartia <neerajgartia211002@gmail.com>
2025-03-05 20:59:13 -08:00 · 2024-12-20 04:20:28 +05:30 · 2024-12-20 04:20:28 +05:30 · 0e99ca3e8c
parent 804ab49cfc
commit 0e99ca3e8c
3 changed files with 75 additions and 10 deletions
--- a/promql/functions.go
+++ b/promql/functions.go
@ -355,7 +355,7 @@ func calcTrendValue(i int, tf, s0, s1, b float64) float64 {
 // https://en.wikipedia.org/wiki/Exponential_smoothing .
 func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	samples := vals[0].(Matrix)[0]
-
+	metricName := samples.Metric.Get(labels.MetricName)
 	// The smoothing factor argument.
 	sf := vals[1].(Vector)[0].F

@ -374,6 +374,10 @@ func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions

 	// Can't do the smoothing operation with less than two points.
 	if l < 2 {
+		// Annotate mix of float and histogram.
+		if l == 1 && len(samples.Histograms) > 0 {
+			return enh.Out, annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+		}
 		return enh.Out, nil
 	}

@ -394,7 +398,9 @@ func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions

 		s0, s1 = s1, x+y
 	}
-
+	if len(samples.Histograms) > 0 {
+		return append(enh.Out, Sample{F: s1}), annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+	}
 	return append(enh.Out, Sample{F: s1}), nil
 }

@ -1110,10 +1116,15 @@ func linearRegression(samples []FPoint, interceptTime int64) (slope, intercept f
 // === deriv(node parser.ValueTypeMatrix) (Vector, Annotations) ===
 func funcDeriv(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	samples := vals[0].(Matrix)[0]
+	metricName := samples.Metric.Get(labels.MetricName)

-	// No sense in trying to compute a derivative without at least two points.
+	// No sense in trying to compute a derivative without at least two float points.
 	// Drop this Vector element.
 	if len(samples.Floats) < 2 {
+		// Annotate mix of float and histogram.
+		if len(samples.Floats) == 1 && len(samples.Histograms) > 0 {
+			return enh.Out, annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+		}
 		return enh.Out, nil
 	}

@ -1121,6 +1132,9 @@ func funcDeriv(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper
 	// to avoid floating point accuracy issues, see
 	// https://github.com/prometheus/prometheus/issues/2674
 	slope, _ := linearRegression(samples.Floats, samples.Floats[0].T)
+	if len(samples.Histograms) > 0 {
+		return append(enh.Out, Sample{F: slope}), annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+	}
 	return append(enh.Out, Sample{F: slope}), nil
 }

@ -1128,13 +1142,22 @@ func funcDeriv(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper
 func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	samples := vals[0].(Matrix)[0]
 	duration := vals[1].(Vector)[0].F
-	// No sense in trying to predict anything without at least two points.
+	metricName := samples.Metric.Get(labels.MetricName)
+
+	// No sense in trying to predict anything without at least two float points.
 	// Drop this Vector element.
 	if len(samples.Floats) < 2 {
+		// Annotate mix of float and histogram.
+		if len(samples.Floats) == 1 && len(samples.Histograms) > 0 {
+			return enh.Out, annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+		}
 		return enh.Out, nil
 	}
-	slope, intercept := linearRegression(samples.Floats, enh.Ts)

+	slope, intercept := linearRegression(samples.Floats, enh.Ts)
+	if len(samples.Histograms) > 0 {
+		return append(enh.Out, Sample{F: slope*duration + intercept}), annotations.New().Add(annotations.NewHistogramIgnoredInMixedRangeInfo(metricName, args[0].PositionRange()))
+	}
 	return append(enh.Out, Sample{F: slope*duration + intercept}), nil
 }

--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@ -259,6 +259,7 @@ load 5m
 	testcounter_reset_middle_mix	0+10x4 0+10x5 {{schema:0 sum:1 count:1}} {{schema:1 sum:2 count:2}}
 	http_requests_mix{job="app-server", instance="1", group="canary"}		0+80x10 {{schema:0 sum:1 count:1}}
 	http_requests_histogram{job="app-server", instance="1", group="canary"}		{{schema:0 sum:1 count:2}}x10
+	http_requests_inf{job="app-server", instance="1", group="canary"}	-Inf 0+80x10 Inf

 # deriv should return the same as rate in simple cases.
 eval instant at 50m rate(http_requests_total{group="canary", instance="1", job="app-server"}[50m])
@ -271,16 +272,20 @@ eval instant at 50m deriv(http_requests_total{group="canary", instance="1", job=
 eval instant at 50m deriv(testcounter_reset_middle_total[100m])
 	{} 0.010606060606060607

-# deriv should ignore histograms.
-eval instant at 110m deriv(http_requests_mix{group="canary", instance="1", job="app-server"}[110m])
+# deriv should ignore histograms with info annotation.
+eval_info instant at 110m deriv(http_requests_mix{group="canary", instance="1", job="app-server"}[110m])
 	{group="canary", instance="1", job="app-server"} 0.26666666666666666

-eval instant at 100m deriv(testcounter_reset_middle_mix[110m])
+eval_info instant at 100m deriv(testcounter_reset_middle_mix[110m])
 	{} 0.010606060606060607

 eval instant at 50m deriv(http_requests_histogram[60m])
 	#empty

+# deriv should return NaN in case of +Inf or -Inf found.
+eval instant at 100m deriv(http_requests_inf[100m])
+	{job="app-server", instance="1", group="canary"} NaN
+
 # predict_linear should return correct result.
 # X/s = [  0, 300, 600, 900,1200,1500,1800,2100,2400,2700,3000]
 # Y   = [  0,  10,  20,  30,  40,   0,  10,  20,  30,  40,  50]
@ -316,6 +321,20 @@ eval instant at 10m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 3
 eval instant at 70m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 3600)
 	{} 89.54545454545455

+# predict_linear should ignore histogram with info annotation.
+eval_info instant at 60m predict_linear(testcounter_reset_middle_mix[60m], 3000)
+	{} 70
+
+eval_info instant at 60m predict_linear(testcounter_reset_middle_mix[60m], 50m)
+	{} 70
+
+eval instant at 60m predict_linear(http_requests_histogram[60m], 50m)
+	#empty
+
+# predict_linear should return NaN in case of +Inf or -Inf found.
+eval instant at 100m predict_linear(http_requests_inf[100m], 6000)
+	{job="app-server", instance="1", group="canary"} NaN
+
 # With http_requests_total, there is a sample value exactly at the end of
 # the range, and it has exactly the predicted value, so predict_linear
 # can be emulated with deriv.
@ -719,6 +738,11 @@ load 10s
 	http_requests{job="api-server", instance="1", group="production"}	0+20x1000 200+30x1000
 	http_requests{job="api-server", instance="0", group="canary"}		0+30x1000 300+80x1000
 	http_requests{job="api-server", instance="1", group="canary"}		0+40x2000
+	http_requests_mix{job="api-server", instance="0", group="production"}	0+10x1000 100+30x1000 {{schema:0 count:1 sum:2}}x1000
+	http_requests_mix{job="api-server", instance="1", group="production"}	0+20x1000 200+30x1000 {{schema:0 count:1 sum:2}}x1000
+	http_requests_mix{job="api-server", instance="0", group="canary"}		0+30x1000 300+80x1000 {{schema:0 count:1 sum:2}}x1000
+	http_requests_mix{job="api-server", instance="1", group="canary"}		0+40x2000 {{schema:0 count:1 sum:2}}x1000
+	http_requests_histogram{job="api-server", instance="1", group="canary"}	{{schema:0 count:1 sum:2}}x1000

 eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1)
 	{job="api-server", instance="0", group="production"} 8000
@ -726,6 +750,16 @@ eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1)
 	{job="api-server", instance="0", group="canary"} 24000
 	{job="api-server", instance="1", group="canary"} 32000

+# double_exponential_smoothing should ignore histogram with info annotation.
+eval_info instant at 20010s double_exponential_smoothing(http_requests_mix[1m], 0.01, 0.1)
+	{job="api-server", instance="0", group="production"} 30100
+	{job="api-server", instance="1", group="production"} 30200
+	{job="api-server", instance="0", group="canary"} 80300
+	{job="api-server", instance="1", group="canary"} 80000
+
+eval instant at 10000s double_exponential_smoothing(http_requests_histogram[1m], 0.01, 0.1)
+	#empty
+
 # negative trends
 clear
 load 10s
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@ -148,6 +148,7 @@ var (
 	HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo)
 	IncompatibleTypesInBinOpInfo            = fmt.Errorf("%w: incompatible sample types encountered for binary operator", PromQLInfo)
 	HistogramIgnoredInAggregationInfo       = fmt.Errorf("%w: ignored histogram in", PromQLInfo)
+	HistogramIgnoredInMixedRangeInfo        = fmt.Errorf("%w: ignored histograms in a range containing both floats and histograms for metric name", PromQLInfo)
 )

 type annoErr struct {
@ -293,3 +294,10 @@ func NewHistogramIgnoredInAggregationInfo(aggregation string, pos posrange.Posit
 		Err:           fmt.Errorf("%w %s aggregation", HistogramIgnoredInAggregationInfo, aggregation),
 	}
 }
+
+func NewHistogramIgnoredInMixedRangeInfo(metricName string, pos posrange.PositionRange) error {
+	return annoErr{
+		PositionRange: pos,
+		Err:           fmt.Errorf("%w %q", HistogramIgnoredInMixedRangeInfo, metricName),
+	}
+}