prometheus/rules/recording_test.go

276 lines
7.7 KiB
Go
Raw Permalink Normal View History

// Copyright 2013 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rules
import (
"context"
"testing"
2016-12-29 08:31:14 -08:00
"time"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/util/teststorage"
"github.com/prometheus/prometheus/util/testutil"
)
var (
ruleEvaluationTime = time.Unix(0, 0).UTC()
exprWithMetricName, _ = parser.ParseExpr(`sort(metric)`)
exprWithoutMetricName, _ = parser.ParseExpr(`sort(metric + metric)`)
)
var ruleEvalTestScenarios = []struct {
name string
ruleLabels labels.Labels
expr parser.Expr
expected promql.Vector
}{
{
name: "no labels in recording rule, metric name in query result",
ruleLabels: labels.EmptyLabels(),
expr: exprWithMetricName,
expected: promql.Vector{
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "1", "label_b", "3"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 1,
T: timestamp.FromTime(ruleEvaluationTime),
},
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "2", "label_b", "4"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 10,
T: timestamp.FromTime(ruleEvaluationTime),
},
},
},
{
name: "only new labels in recording rule, metric name in query result",
ruleLabels: labels.FromStrings("extra_from_rule", "foo"),
expr: exprWithMetricName,
expected: promql.Vector{
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "1", "label_b", "3", "extra_from_rule", "foo"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 1,
T: timestamp.FromTime(ruleEvaluationTime),
},
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "2", "label_b", "4", "extra_from_rule", "foo"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 10,
T: timestamp.FromTime(ruleEvaluationTime),
},
},
},
{
name: "some replacement labels in recording rule, metric name in query result",
ruleLabels: labels.FromStrings("label_a", "from_rule"),
expr: exprWithMetricName,
expected: promql.Vector{
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "from_rule", "label_b", "3"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 1,
T: timestamp.FromTime(ruleEvaluationTime),
},
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "from_rule", "label_b", "4"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 10,
T: timestamp.FromTime(ruleEvaluationTime),
},
},
},
{
name: "no labels in recording rule, no metric name in query result",
ruleLabels: labels.EmptyLabels(),
expr: exprWithoutMetricName,
expected: promql.Vector{
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "1", "label_b", "3"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 2,
T: timestamp.FromTime(ruleEvaluationTime),
},
promql.Sample{
Metric: labels.FromStrings("__name__", "test_rule", "label_a", "2", "label_b", "4"),
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 20,
T: timestamp.FromTime(ruleEvaluationTime),
},
},
},
}
func setUpRuleEvalTest(t require.TestingT) *teststorage.TestStorage {
return promqltest.LoadedStorage(t, `
load 1m
metric{label_a="1",label_b="3"} 1
metric{label_a="2",label_b="4"} 10
`)
}
func TestRuleEval(t *testing.T) {
storage := setUpRuleEvalTest(t)
t.Cleanup(func() { storage.Close() })
for _, scenario := range ruleEvalTestScenarios {
t.Run(scenario.name, func(t *testing.T) {
rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels)
Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix comments Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add a server configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Appease the linter #1 Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add the new server flag documentation Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve documentation of the new flag and configuration Signed-off-by: gotjosh <josue.abreu@gmail.com> * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `initial` to the flag help Signed-off-by: gotjosh <josue.abreu@gmail.com> * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve wording on CHANGELOG Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh <josue.abreu@gmail.com> * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-05-30 03:49:50 -07:00
result, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0)
require.NoError(t, err)
testutil.RequireEqual(t, scenario.expected, result)
})
}
}
func BenchmarkRuleEval(b *testing.B) {
storage := setUpRuleEvalTest(b)
b.Cleanup(func() { storage.Close() })
for _, scenario := range ruleEvalTestScenarios {
b.Run(scenario.name, func(b *testing.B) {
rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels)
b.ResetTimer()
for i := 0; i < b.N; i++ {
Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix comments Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add a server configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Appease the linter #1 Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add the new server flag documentation Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve documentation of the new flag and configuration Signed-off-by: gotjosh <josue.abreu@gmail.com> * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `initial` to the flag help Signed-off-by: gotjosh <josue.abreu@gmail.com> * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve wording on CHANGELOG Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh <josue.abreu@gmail.com> * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-05-30 03:49:50 -07:00
_, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0)
if err != nil {
require.NoError(b, err)
}
}
})
}
}
// TestRuleEvalDuplicate tests for duplicate labels in recorded metrics, see #5529.
func TestRuleEvalDuplicate(t *testing.T) {
storage := teststorage.New(t)
defer storage.Close()
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promql.NewEngine(opts)
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
now := time.Now()
expr, _ := parser.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`)
rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test"))
Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix comments Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add a server configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Appease the linter #1 Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add the new server flag documentation Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve documentation of the new flag and configuration Signed-off-by: gotjosh <josue.abreu@gmail.com> * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `initial` to the flag help Signed-off-by: gotjosh <josue.abreu@gmail.com> * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve wording on CHANGELOG Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh <josue.abreu@gmail.com> * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-05-30 03:49:50 -07:00
_, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0)
require.Error(t, err)
require.EqualError(t, err, "vector contains metrics with the same labelset after applying rule labels")
}
func TestRecordingRuleLimit(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 1m
metric{label="1"} 1
metric{label="2"} 1
`)
t.Cleanup(func() { storage.Close() })
tests := []struct {
limit int
err string
}{
{
limit: 0,
},
{
limit: -1,
},
{
limit: 2,
},
{
limit: 1,
err: "exceeded limit of 1 with 2 series",
},
}
expr, _ := parser.ParseExpr(`metric > 0`)
rule := NewRecordingRule(
"foo",
expr,
labels.FromStrings("test", "test"),
)
evalTime := time.Unix(0, 0)
for _, test := range tests {
Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix comments Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add a server configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Appease the linter #1 Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add the new server flag documentation Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve documentation of the new flag and configuration Signed-off-by: gotjosh <josue.abreu@gmail.com> * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `initial` to the flag help Signed-off-by: gotjosh <josue.abreu@gmail.com> * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve wording on CHANGELOG Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh <josue.abreu@gmail.com> * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-05-30 03:49:50 -07:00
switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); {
case err != nil:
require.EqualError(t, err, test.err)
case test.err != "":
t.Errorf("Expected error %s, got none", test.err)
}
}
}
// TestRecordingEvalWithOrigin checks that the recording rule details are passed through the context.
func TestRecordingEvalWithOrigin(t *testing.T) {
ctx := context.Background()
now := time.Now()
const (
name = "my-recording-rule"
query = `count(metric{foo="bar"})`
)
var (
detail RuleDetail
lbs = labels.FromStrings("foo", "bar")
)
expr, err := parser.ParseExpr(query)
require.NoError(t, err)
rule := NewRecordingRule(name, expr, lbs)
Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> * Fix comments Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add a server configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Appease the linter #1 Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add the new server flag documentation Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve documentation of the new flag and configuration Signed-off-by: gotjosh <josue.abreu@gmail.com> * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `initial` to the flag help Signed-off-by: gotjosh <josue.abreu@gmail.com> * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh <josue.abreu@gmail.com> E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve wording on CHANGELOG Signed-off-by: gotjosh <josue.abreu@gmail.com> * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh <josue.abreu@gmail.com> * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh <josue.abreu@gmail.com> * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-05-30 03:49:50 -07:00
_, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) {
detail = FromOriginContext(ctx)
return nil, nil
}, nil, 0)
require.NoError(t, err)
require.Equal(t, detail, NewRuleDetail(rule))
}
func TestRecordingRule_SetNoDependentRules(t *testing.T) {
rule := NewRecordingRule("1", &parser.NumberLiteral{Val: 1}, labels.EmptyLabels())
require.False(t, rule.NoDependentRules())
rule.SetNoDependentRules(false)
require.False(t, rule.NoDependentRules())
rule.SetNoDependentRules(true)
require.True(t, rule.NoDependentRules())
}
func TestRecordingRule_SetNoDependencyRules(t *testing.T) {
rule := NewRecordingRule("1", &parser.NumberLiteral{Val: 1}, labels.EmptyLabels())
require.False(t, rule.NoDependencyRules())
rule.SetNoDependencyRules(false)
require.False(t, rule.NoDependencyRules())
rule.SetNoDependencyRules(true)
require.True(t, rule.NoDependencyRules())
}