promql: Fix subqueries to be really left-open

Previously, we managed to get rid of the sample on the left bound later, so the problem didn't show up in the framework tests. But the subqueries were still evaluation with the sample on the left bound, taking space and showing up if returning the subquery result directly (without further processing through PromQL like in all the framework tests). Signed-off-by: beorn7 <beorn@grafana.com>
2025-03-05 20:59:13 -08:00 · 2024-11-20 18:18:39 +01:00 · 2024-11-20 18:18:39 +01:00 · 4b573e0521
parent c5d009d57f
commit 4b573e0521
2 changed files with 149 additions and 16 deletions
--- a/promql/engine.go
+++ b/promql/engine.go
@ -1524,7 +1524,7 @@ func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr
 	// Avoid double counting samples when running a subquery, those samples will be counted in later stage.
 	ev.samplesStats = ev.samplesStats.NewChild()
 	val, ws := ev.eval(ctx, subq)
-	// But do incorporate the peak from the subquery
+	// But do incorporate the peak from the subquery.
 	samplesStats.UpdatePeakFromSubquery(ev.samplesStats)
 	ev.samplesStats = samplesStats
 	mat := val.(Matrix)
@ -1989,7 +1989,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value,
 		// Start with the first timestamp after (ev.startTimestamp - offset - range)
 		// that is aligned with the step (multiple of 'newEv.interval').
 		newEv.startTimestamp = newEv.interval * ((ev.startTimestamp - offsetMillis - rangeMillis) / newEv.interval)
-		if newEv.startTimestamp < (ev.startTimestamp - offsetMillis - rangeMillis) {
+		if newEv.startTimestamp <= (ev.startTimestamp - offsetMillis - rangeMillis) {
 			newEv.startTimestamp += newEv.interval
 		}

--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@ -1428,23 +1428,23 @@ load 10s
 		},
 		{
 			// The peak samples in memory is during the first evaluation:
-			//   - Subquery takes 22 samples, 11 for each bigmetric, but samples on the left bound won't be evaluated.
+			//   - Subquery takes 20 samples, 10 for each bigmetric.
 			//   - Result is calculated per series where the series samples is buffered, hence 10 more here.
 			//   - The result of two series is added before the last series buffer is discarded, so 2 more here.
-			//   Hence at peak it is 22 (subquery) + 10 (buffer of a series) + 2 (result from 2 series).
+			//   Hence at peak it is 20 (subquery) + 10 (buffer of a series) + 2 (result from 2 series).
 			// The subquery samples and the buffer is discarded before duplicating.
 			Query:      `rate(bigmetric[10s:1s] @ 10)`,
-			MaxSamples: 34,
+			MaxSamples: 32,
 			Start:      time.Unix(0, 0),
 			End:        time.Unix(10, 0),
 			Interval:   5 * time.Second,
 		},
 		{
 			// Here the reasoning is same as above. But LHS and RHS are done one after another.
-			// So while one of them takes 34 samples at peak, we need to hold the 2 sample
+			// So while one of them takes 32 samples at peak, we need to hold the 2 sample
 			// result of the other till then.
 			Query:      `rate(bigmetric[10s:1s] @ 10) + rate(bigmetric[10s:1s] @ 30)`,
-			MaxSamples: 36,
+			MaxSamples: 34,
 			Start:      time.Unix(0, 0),
 			End:        time.Unix(10, 0),
 			Interval:   5 * time.Second,
@ -1452,28 +1452,28 @@ load 10s
 		{
 			// promql.Sample as above but with only 1 part as step invariant.
 			// Here the peak is caused by the non-step invariant part as it touches more time range.
-			// Hence at peak it is 2*21 (subquery from 0s to 20s)
+			// Hence at peak it is 2*20 (subquery from 0s to 20s)
 			//                     + 10 (buffer of a series per evaluation)
 			//                     + 6 (result from 2 series at 3 eval times).
 			Query:      `rate(bigmetric[10s:1s]) + rate(bigmetric[10s:1s] @ 30)`,
-			MaxSamples: 58,
+			MaxSamples: 56,
 			Start:      time.Unix(10, 0),
 			End:        time.Unix(20, 0),
 			Interval:   5 * time.Second,
 		},
 		{
 			// Nested subquery.
-			// We saw that innermost rate takes 34 samples which is still the peak
+			// We saw that innermost rate takes 32 samples which is still the peak
 			// since the other two subqueries just duplicate the result.
-			Query:      `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`,
-			MaxSamples: 34,
+			Query:      `rate(rate(bigmetric[10:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`,
+			MaxSamples: 32,
 			Start:      time.Unix(10, 0),
 		},
 		{
 			// Nested subquery.
 			// Now the outermost subquery produces more samples than innermost rate.
 			Query:      `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[17s:1s] @ 2000`,
-			MaxSamples: 36,
+			MaxSamples: 34,
 			Start:      time.Unix(10, 0),
 		},
 	}
@ -1618,6 +1618,19 @@ load 1ms
 		}, {
 			query: "metric[100s:25s] @ 300",
 			start: 100,
+			result: promql.Matrix{
+				promql.Series{
+					Floats: []promql.FPoint{{F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}},
+					Metric: lbls1,
+				},
+				promql.Series{
+					Floats: []promql.FPoint{{F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}},
+					Metric: lbls2,
+				},
+			},
+		}, {
+			query: "metric[100s1ms:25s] @ 300", // Add 1ms to the range to see the legacy behavior of the previous test.
+			start: 100,
 			result: promql.Matrix{
 				promql.Series{
 					Floats: []promql.FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}},
@ -1631,6 +1644,15 @@ load 1ms
 		}, {
 			query: "metric_neg[50s:25s] @ 0",
 			start: 100,
+			result: promql.Matrix{
+				promql.Series{
+					Floats: []promql.FPoint{{F: 26, T: -25000}, {F: 1, T: 0}},
+					Metric: lblsneg,
+				},
+			},
+		}, {
+			query: "metric_neg[50s1ms:25s] @ 0", // Add 1ms to the range to see the legacy behavior of the previous test.
+			start: 100,
 			result: promql.Matrix{
 				promql.Series{
 					Floats: []promql.FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}},
@ -1640,6 +1662,15 @@ load 1ms
 		}, {
 			query: "metric_neg[50s:25s] @ -100",
 			start: 100,
+			result: promql.Matrix{
+				promql.Series{
+					Floats: []promql.FPoint{{F: 126, T: -125000}, {F: 101, T: -100000}},
+					Metric: lblsneg,
+				},
+			},
+		}, {
+			query: "metric_neg[50s1ms:25s] @ -100", // Add 1ms to the range to see the legacy behavior of the previous test.
+			start: 100,
 			result: promql.Matrix{
 				promql.Series{
 					Floats: []promql.FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}},
@ -1647,7 +1678,7 @@ load 1ms
 				},
 			},
 		}, {
-			query: `metric_ms[100ms:25ms] @ 2.345`,
+			query: `metric_ms[101ms:25ms] @ 2.345`,
 			start: 100,
 			result: promql.Matrix{
 				promql.Series{
@ -1832,7 +1863,7 @@ func TestSubquerySelector(t *testing.T) {
 						nil,
 						promql.Matrix{
 							promql.Series{
-								Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}},
+								Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}},
 								Metric: labels.FromStrings("__name__", "metric"),
 							},
 						},
@ -1879,6 +1910,20 @@ func TestSubquerySelector(t *testing.T) {
 			cases: []caseType{
 				{ // Normal selector.
 					Query: `http_requests{group=~"pro.*",instance="0"}[30s:10s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats: []promql.FPoint{{F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}},
+								Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"),
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(10020, 0),
+				},
+				{ // Normal selector. Add 1ms to the range to see the legacy behavior of the previous test.
+					Query: `http_requests{group=~"pro.*",instance="0"}[30s1ms:10s]`,
 					Result: promql.Result{
 						nil,
 						promql.Matrix{
@ -1921,6 +1966,36 @@ func TestSubquerySelector(t *testing.T) {
 				},
 				{
 					Query: `rate(http_requests[1m])[15s:5s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats:   []promql.FPoint{{F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}},
+								Metric:   labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"),
+								DropName: true,
+							},
+							promql.Series{
+								Floats:   []promql.FPoint{{F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}},
+								Metric:   labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"),
+								DropName: true,
+							},
+							promql.Series{
+								Floats:   []promql.FPoint{{F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}},
+								Metric:   labels.FromStrings("job", "api-server", "instance", "0", "group", "production"),
+								DropName: true,
+							},
+							promql.Series{
+								Floats:   []promql.FPoint{{F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}},
+								Metric:   labels.FromStrings("job", "api-server", "instance", "1", "group", "production"),
+								DropName: true,
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(8000, 0),
+				},
+				{
+					Query: `rate(http_requests[1m])[15s1ms:5s]`, // Add 1ms to the range to see the legacy behavior of the previous test.
 					Result: promql.Result{
 						nil,
 						promql.Matrix{
@ -1951,6 +2026,35 @@ func TestSubquerySelector(t *testing.T) {
 				},
 				{
 					Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}},
+								Metric: labels.EmptyLabels(),
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(120, 0),
+				},
+				{
+					Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}},
+								Metric: labels.EmptyLabels(),
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(121, 0), // 1s later doesn't change the result.
+				},
+				{
+					// Add 1ms to the range to see the legacy behavior of the previous test.
+					Query: `sum(http_requests{group=~"pro.*"})[30s1ms:10s]`,
 					Result: promql.Result{
 						nil,
 						promql.Matrix{
@ -1965,6 +2069,20 @@ func TestSubquerySelector(t *testing.T) {
 				},
 				{
 					Query: `sum(http_requests)[40s:10s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats: []promql.FPoint{{F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}},
+								Metric: labels.EmptyLabels(),
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(120, 0),
+				},
+				{
+					Query: `sum(http_requests)[40s1ms:10s]`, // Add 1ms to the range to see the legacy behavior of the previous test.
 					Result: promql.Result{
 						nil,
 						promql.Matrix{
@ -1979,6 +2097,21 @@ func TestSubquerySelector(t *testing.T) {
 				},
 				{
 					Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s:5s]`,
+					Result: promql.Result{
+						nil,
+						promql.Matrix{
+							promql.Series{
+								Floats: []promql.FPoint{{F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}},
+								Metric: labels.EmptyLabels(),
+							},
+						},
+						nil,
+					},
+					Start: time.Unix(120, 0),
+				},
+				{
+					// Add 1ms to the range to see the legacy behavior of the previous test.
+					Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s1ms:5s]`,
 					Result: promql.Result{
 						nil,
 						promql.Matrix{