2015-01-21 11:07:45 -08:00
// Copyright 2013 The Prometheus Authors
2013-02-07 02:49:04 -08:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2013-01-07 14:24:26 -08:00
package rules
import (
2017-05-18 09:47:00 -07:00
"context"
2019-12-19 02:41:11 -08:00
"fmt"
2017-05-18 09:47:00 -07:00
"math"
2019-12-19 02:41:11 -08:00
"os"
2017-11-23 04:04:54 -08:00
"sort"
2013-01-07 14:24:26 -08:00
"testing"
2013-03-21 10:06:15 -07:00
"time"
2013-06-25 05:02:27 -07:00
2021-06-11 09:17:59 -07:00
"github.com/go-kit/log"
2020-01-27 04:41:32 -08:00
"github.com/prometheus/client_golang/prometheus"
2015-08-20 08:18:46 -07:00
"github.com/prometheus/common/model"
2020-10-29 02:43:23 -07:00
"github.com/stretchr/testify/require"
2022-07-19 03:58:37 -07:00
"go.uber.org/atomic"
2020-07-27 01:38:08 -07:00
"go.uber.org/goleak"
2022-08-31 06:50:38 -07:00
"gopkg.in/yaml.v2"
2013-06-25 05:02:27 -07:00
2021-11-08 06:23:17 -08:00
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/model/value"
2015-03-30 10:43:19 -07:00
"github.com/prometheus/prometheus/promql"
2020-02-03 10:23:07 -08:00
"github.com/prometheus/prometheus/promql/parser"
2017-05-18 09:47:00 -07:00
"github.com/prometheus/prometheus/storage"
2021-11-28 23:54:23 -08:00
"github.com/prometheus/prometheus/tsdb/chunkenc"
2023-02-10 03:39:33 -08:00
"github.com/prometheus/prometheus/tsdb/tsdbutil"
2019-08-08 18:35:39 -07:00
"github.com/prometheus/prometheus/util/teststorage"
2013-01-07 14:24:26 -08:00
)
2020-07-27 01:38:08 -07:00
func TestMain ( m * testing . M ) {
goleak . VerifyTestMain ( m )
}
2015-06-30 02:51:05 -07:00
func TestAlertingRule ( t * testing . T ) {
2023-08-18 11:48:59 -07:00
storage := promql . LoadedStorage ( t , `
2015-06-30 02:51:05 -07:00
load 5 m
2016-07-12 09:11:31 -07:00
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 95 105 105 95 85
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 80 90 100 110 120 130 140
2015-06-30 02:51:05 -07:00
` )
2023-08-18 11:48:59 -07:00
t . Cleanup ( func ( ) { storage . Close ( ) } )
2013-01-07 14:24:26 -08:00
2020-02-03 10:23:07 -08:00
expr , err := parser . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2013-04-22 15:26:59 -07:00
2015-06-30 02:51:05 -07:00
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
time . Minute ,
2023-01-09 03:21:38 -08:00
0 ,
2016-12-29 08:31:14 -08:00
labels . FromStrings ( "severity" , "{{\"c\"}}ritical" ) ,
2022-07-21 09:44:35 -07:00
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ,
2015-06-30 02:51:05 -07:00
)
2017-11-23 04:04:54 -08:00
result := promql . Vector {
2021-11-17 10:57:31 -08:00
promql . Sample {
2017-11-23 04:04:54 -08:00
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "pending" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2017-11-23 04:04:54 -08:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2017-11-23 04:04:54 -08:00
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "pending" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2017-11-23 04:04:54 -08:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2017-11-23 04:04:54 -08:00
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "firing" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2017-11-23 04:04:54 -08:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2017-11-23 04:04:54 -08:00
Metric : labels . FromStrings (
"__name__" , "ALERTS" ,
"alertname" , "HTTPRequestRateLow" ,
"alertstate" , "firing" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2017-11-23 04:04:54 -08:00
} ,
}
2015-06-30 02:51:05 -07:00
2016-12-29 08:31:14 -08:00
baseTime := time . Unix ( 0 , 0 )
2021-10-22 01:06:44 -07:00
tests := [ ] struct {
2015-06-30 02:51:05 -07:00
time time . Duration
2017-11-23 04:04:54 -08:00
result promql . Vector
2015-06-30 02:51:05 -07:00
} {
{
2017-11-23 04:04:54 -08:00
time : 0 ,
result : result [ : 2 ] ,
2021-10-22 01:06:44 -07:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 5 * time . Minute ,
result : result [ 2 : ] ,
2021-10-22 01:06:44 -07:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 10 * time . Minute ,
result : result [ 2 : 3 ] ,
2015-03-30 10:43:19 -07:00
} ,
2013-04-24 02:51:40 -07:00
{
2017-05-19 09:02:25 -07:00
time : 15 * time . Minute ,
2017-11-23 04:04:54 -08:00
result : nil ,
2013-04-24 02:51:40 -07:00
} ,
{
2015-06-30 02:51:05 -07:00
time : 20 * time . Minute ,
2017-11-23 04:04:54 -08:00
result : nil ,
2016-02-04 20:42:55 -08:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 25 * time . Minute ,
result : result [ : 1 ] ,
2016-02-04 20:42:55 -08:00
} ,
{
2017-11-23 04:04:54 -08:00
time : 30 * time . Minute ,
result : result [ 2 : 3 ] ,
2013-04-24 02:51:40 -07:00
} ,
}
2015-03-30 10:43:19 -07:00
2015-06-30 02:51:05 -07:00
for i , test := range tests {
2017-11-23 04:04:54 -08:00
t . Logf ( "case %d" , i )
2016-12-29 08:31:14 -08:00
evalTime := baseTime . Add ( test . time )
2015-03-30 10:43:19 -07:00
2023-08-18 11:48:59 -07:00
res , err := rule . Eval ( context . TODO ( ) , evalTime , EngineQueryFunc ( testEngine , storage ) , nil , 0 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2015-03-30 10:43:19 -07:00
2018-08-02 03:18:24 -07:00
var filteredRes promql . Vector // After removing 'ALERTS_FOR_STATE' samples.
for _ , smpl := range res {
smplName := smpl . Metric . Get ( "__name__" )
if smplName == "ALERTS" {
filteredRes = append ( filteredRes , smpl )
} else {
// If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'.
2023-12-07 03:35:01 -08:00
require . Equal ( t , "ALERTS_FOR_STATE" , smplName )
2018-08-02 03:18:24 -07:00
}
}
2017-11-23 04:04:54 -08:00
for i := range test . result {
test . result [ i ] . T = timestamp . FromTime ( evalTime )
2013-04-24 02:51:40 -07:00
}
2020-10-29 02:43:23 -07:00
require . Equal ( t , len ( test . result ) , len ( filteredRes ) , "%d. Number of samples in expected and actual output don't match (%d vs. %d)" , i , len ( test . result ) , len ( res ) )
2017-11-23 04:04:54 -08:00
2018-08-02 03:18:24 -07:00
sort . Slice ( filteredRes , func ( i , j int ) bool {
return labels . Compare ( filteredRes [ i ] . Metric , filteredRes [ j ] . Metric ) < 0
2017-11-23 04:04:54 -08:00
} )
2020-10-29 02:43:23 -07:00
require . Equal ( t , test . result , filteredRes )
2016-08-01 15:32:01 -07:00
for _ , aa := range rule . ActiveAlerts ( ) {
2020-10-29 02:43:23 -07:00
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
2016-08-01 15:32:01 -07:00
}
2013-04-24 02:51:40 -07:00
}
}
2015-06-30 02:51:05 -07:00
2018-08-02 03:18:24 -07:00
func TestForStateAddSamples ( t * testing . T ) {
2023-08-18 11:48:59 -07:00
storage := promql . LoadedStorage ( t , `
2018-08-02 03:18:24 -07:00
load 5 m
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 95 105 105 95 85
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 80 90 100 110 120 130 140
` )
2023-08-18 11:48:59 -07:00
t . Cleanup ( func ( ) { storage . Close ( ) } )
2018-08-02 03:18:24 -07:00
2020-02-03 10:23:07 -08:00
expr , err := parser . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2018-08-02 03:18:24 -07:00
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
time . Minute ,
2023-01-09 03:21:38 -08:00
0 ,
2018-08-02 03:18:24 -07:00
labels . FromStrings ( "severity" , "{{\"c\"}}ritical" ) ,
2022-07-21 09:44:35 -07:00
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ,
2018-08-02 03:18:24 -07:00
)
result := promql . Vector {
2021-11-17 10:57:31 -08:00
promql . Sample {
2018-08-02 03:18:24 -07:00
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2018-08-02 03:18:24 -07:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2018-08-02 03:18:24 -07:00
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2018-08-02 03:18:24 -07:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2018-08-02 03:18:24 -07:00
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "0" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2018-08-02 03:18:24 -07:00
} ,
2021-11-17 10:57:31 -08:00
promql . Sample {
2018-08-02 03:18:24 -07:00
Metric : labels . FromStrings (
"__name__" , "ALERTS_FOR_STATE" ,
"alertname" , "HTTPRequestRateLow" ,
"group" , "canary" ,
"instance" , "1" ,
"job" , "app-server" ,
"severity" , "critical" ,
) ,
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F : 1 ,
2018-08-02 03:18:24 -07:00
} ,
}
baseTime := time . Unix ( 0 , 0 )
2021-10-22 01:06:44 -07:00
tests := [ ] struct {
2018-08-02 03:18:24 -07:00
time time . Duration
result promql . Vector
persistThisTime bool // If true, it means this 'time' is persisted for 'for'.
} {
{
time : 0 ,
result : append ( promql . Vector { } , result [ : 2 ] ... ) ,
persistThisTime : true ,
} ,
{
time : 5 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : ] ... ) ,
} ,
{
time : 10 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : 3 ] ... ) ,
} ,
{
time : 15 * time . Minute ,
result : nil ,
} ,
{
time : 20 * time . Minute ,
result : nil ,
} ,
{
time : 25 * time . Minute ,
result : append ( promql . Vector { } , result [ : 1 ] ... ) ,
persistThisTime : true ,
} ,
{
time : 30 * time . Minute ,
result : append ( promql . Vector { } , result [ 2 : 3 ] ... ) ,
} ,
}
var forState float64
for i , test := range tests {
t . Logf ( "case %d" , i )
evalTime := baseTime . Add ( test . time )
if test . persistThisTime {
forState = float64 ( evalTime . Unix ( ) )
}
if test . result == nil {
forState = float64 ( value . StaleNaN )
}
2023-08-18 11:48:59 -07:00
res , err := rule . Eval ( context . TODO ( ) , evalTime , EngineQueryFunc ( testEngine , storage ) , nil , 0 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2018-08-02 03:18:24 -07:00
var filteredRes promql . Vector // After removing 'ALERTS' samples.
for _ , smpl := range res {
smplName := smpl . Metric . Get ( "__name__" )
if smplName == "ALERTS_FOR_STATE" {
filteredRes = append ( filteredRes , smpl )
} else {
// If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'.
2023-12-07 03:35:01 -08:00
require . Equal ( t , "ALERTS" , smplName )
2018-08-02 03:18:24 -07:00
}
}
for i := range test . result {
test . result [ i ] . T = timestamp . FromTime ( evalTime )
// Updating the expected 'for' state.
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
if test . result [ i ] . F >= 0 {
test . result [ i ] . F = forState
2018-08-02 03:18:24 -07:00
}
}
2020-10-29 02:43:23 -07:00
require . Equal ( t , len ( test . result ) , len ( filteredRes ) , "%d. Number of samples in expected and actual output don't match (%d vs. %d)" , i , len ( test . result ) , len ( res ) )
2018-08-02 03:18:24 -07:00
sort . Slice ( filteredRes , func ( i , j int ) bool {
return labels . Compare ( filteredRes [ i ] . Metric , filteredRes [ j ] . Metric ) < 0
} )
2020-10-29 02:43:23 -07:00
require . Equal ( t , test . result , filteredRes )
2018-08-02 03:18:24 -07:00
for _ , aa := range rule . ActiveAlerts ( ) {
2020-10-29 02:43:23 -07:00
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
2018-08-02 03:18:24 -07:00
}
}
}
2018-08-16 10:26:15 -07:00
// sortAlerts sorts `[]*Alert` w.r.t. the Labels.
func sortAlerts ( items [ ] * Alert ) {
sort . Slice ( items , func ( i , j int ) bool {
return labels . Compare ( items [ i ] . Labels , items [ j ] . Labels ) <= 0
} )
}
2018-08-02 03:18:24 -07:00
2018-08-16 10:26:15 -07:00
func TestForStateRestore ( t * testing . T ) {
2023-08-18 11:48:59 -07:00
storage := promql . LoadedStorage ( t , `
2018-08-02 03:18:24 -07:00
load 5 m
http_requests { job = "app-server" , instance = "0" , group = "canary" , severity = "overwrite-me" } 75 85 50 0 0 25 0 0 40 0 120
http_requests { job = "app-server" , instance = "1" , group = "canary" , severity = "overwrite-me" } 125 90 60 0 0 25 0 0 40 0 130
` )
2023-08-18 11:48:59 -07:00
t . Cleanup ( func ( ) { storage . Close ( ) } )
2018-08-02 03:18:24 -07:00
2020-02-03 10:23:07 -08:00
expr , err := parser . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2018-08-02 03:18:24 -07:00
opts := & ManagerOptions {
2023-08-18 11:48:59 -07:00
QueryFunc : EngineQueryFunc ( testEngine , storage ) ,
Appendable : storage ,
Queryable : storage ,
2018-08-04 12:31:12 -07:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
NotifyFunc : func ( ctx context . Context , expr string , alerts ... * Alert ) { } ,
2018-08-02 03:18:24 -07:00
OutageTolerance : 30 * time . Minute ,
ForGracePeriod : 10 * time . Minute ,
}
alertForDuration := 25 * time . Minute
// Initial run before prometheus goes down.
rule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
2023-01-09 03:21:38 -08:00
0 ,
2018-08-02 03:18:24 -07:00
labels . FromStrings ( "severity" , "critical" ) ,
2022-07-21 09:44:35 -07:00
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ,
2018-08-02 03:18:24 -07:00
)
2020-02-12 07:22:18 -08:00
group := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
} )
2018-08-02 03:18:24 -07:00
groups := make ( map [ string ] * Group )
groups [ "default;" ] = group
initialRuns := [ ] time . Duration { 0 , 5 * time . Minute }
baseTime := time . Unix ( 0 , 0 )
for _ , duration := range initialRuns {
evalTime := baseTime . Add ( duration )
2023-08-18 11:48:59 -07:00
group . Eval ( context . TODO ( ) , evalTime )
2018-08-02 03:18:24 -07:00
}
exp := rule . ActiveAlerts ( )
for _ , aa := range exp {
2020-10-29 02:43:23 -07:00
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
2018-08-02 03:18:24 -07:00
}
sort . Slice ( exp , func ( i , j int ) bool {
return labels . Compare ( exp [ i ] . Labels , exp [ j ] . Labels ) < 0
} )
// Prometheus goes down here. We create new rules and groups.
type testInput struct {
restoreDuration time . Duration
alerts [ ] * Alert
num int
noRestore bool
gracePeriod bool
downDuration time . Duration
}
tests := [ ] testInput {
{
// Normal restore (alerts were not firing).
2018-08-16 10:26:15 -07:00
restoreDuration : 15 * time . Minute ,
2018-08-02 03:18:24 -07:00
alerts : rule . ActiveAlerts ( ) ,
2018-08-16 10:26:15 -07:00
downDuration : 10 * time . Minute ,
2018-08-02 03:18:24 -07:00
} ,
{
// Testing Outage Tolerance.
restoreDuration : 40 * time . Minute ,
noRestore : true ,
num : 2 ,
} ,
{
// No active alerts.
restoreDuration : 50 * time . Minute ,
alerts : [ ] * Alert { } ,
} ,
}
testFunc := func ( tst testInput ) {
newRule := NewAlertingRule (
"HTTPRequestRateLow" ,
expr ,
alertForDuration ,
2023-01-09 03:21:38 -08:00
0 ,
2018-08-02 03:18:24 -07:00
labels . FromStrings ( "severity" , "critical" ) ,
2022-07-21 09:44:35 -07:00
labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , false , nil ,
2018-08-02 03:18:24 -07:00
)
2020-02-12 07:22:18 -08:00
newGroup := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { newRule } ,
ShouldRestore : true ,
Opts : opts ,
} )
2018-08-02 03:18:24 -07:00
newGroups := make ( map [ string ] * Group )
newGroups [ "default;" ] = newGroup
restoreTime := baseTime . Add ( tst . restoreDuration )
// First eval before restoration.
2023-08-18 11:48:59 -07:00
newGroup . Eval ( context . TODO ( ) , restoreTime )
2018-08-02 03:18:24 -07:00
// Restore happens here.
newGroup . RestoreForState ( restoreTime )
got := newRule . ActiveAlerts ( )
for _ , aa := range got {
2020-10-29 02:43:23 -07:00
require . Zero ( t , aa . Labels . Get ( model . MetricNameLabel ) , "%s label set on active alert: %s" , model . MetricNameLabel , aa . Labels )
2018-08-02 03:18:24 -07:00
}
sort . Slice ( got , func ( i , j int ) bool {
return labels . Compare ( got [ i ] . Labels , got [ j ] . Labels ) < 0
} )
// Checking if we have restored it correctly.
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
switch {
case tst . noRestore :
2023-12-07 03:35:01 -08:00
require . Len ( t , got , tst . num )
2018-08-02 03:18:24 -07:00
for _ , e := range got {
2020-10-29 02:43:23 -07:00
require . Equal ( t , e . ActiveAt , restoreTime )
2018-08-02 03:18:24 -07:00
}
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
case tst . gracePeriod :
2023-12-07 03:35:01 -08:00
require . Len ( t , got , tst . num )
2018-08-02 03:18:24 -07:00
for _ , e := range got {
2020-10-29 02:43:23 -07:00
require . Equal ( t , opts . ForGracePeriod , e . ActiveAt . Add ( alertForDuration ) . Sub ( restoreTime ) )
2018-08-02 03:18:24 -07:00
}
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 07:14:31 -07:00
default :
2018-08-02 03:18:24 -07:00
exp := tst . alerts
2020-10-29 02:43:23 -07:00
require . Equal ( t , len ( exp ) , len ( got ) )
2018-08-16 10:26:15 -07:00
sortAlerts ( exp )
sortAlerts ( got )
2018-08-02 03:18:24 -07:00
for i , e := range exp {
2020-10-29 02:43:23 -07:00
require . Equal ( t , e . Labels , got [ i ] . Labels )
2018-08-02 03:18:24 -07:00
// Difference in time should be within 1e6 ns, i.e. 1ms
// (due to conversion between ns & ms, float64 & int64).
activeAtDiff := float64 ( e . ActiveAt . Unix ( ) + int64 ( tst . downDuration / time . Second ) - got [ i ] . ActiveAt . Unix ( ) )
2020-10-29 02:43:23 -07:00
require . Equal ( t , 0.0 , math . Abs ( activeAtDiff ) , "'for' state restored time is wrong" )
2018-08-02 03:18:24 -07:00
}
}
}
for _ , tst := range tests {
testFunc ( tst )
}
// Testing the grace period.
for _ , duration := range [ ] time . Duration { 10 * time . Minute , 15 * time . Minute , 20 * time . Minute } {
evalTime := baseTime . Add ( duration )
2023-08-18 11:48:59 -07:00
group . Eval ( context . TODO ( ) , evalTime )
2018-08-02 03:18:24 -07:00
}
testFunc ( testInput {
restoreDuration : 25 * time . Minute ,
alerts : [ ] * Alert { } ,
gracePeriod : true ,
num : 2 ,
} )
}
2017-05-18 09:47:00 -07:00
func TestStaleness ( t * testing . T ) {
2020-03-12 02:36:09 -07:00
st := teststorage . New ( t )
defer st . Close ( )
2018-10-02 04:59:19 -07:00
engineOpts := promql . EngineOpts {
2020-01-28 12:38:49 -08:00
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
2018-10-02 04:59:19 -07:00
}
engine := promql . NewEngine ( engineOpts )
2017-05-18 09:47:00 -07:00
opts := & ManagerOptions {
2020-03-12 02:36:09 -07:00
QueryFunc : EngineQueryFunc ( engine , st ) ,
Appendable : st ,
2020-06-26 11:06:36 -07:00
Queryable : st ,
2017-11-23 04:04:54 -08:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
2017-05-18 09:47:00 -07:00
}
2020-02-03 10:23:07 -08:00
expr , err := parser . ParseExpr ( "a + 1" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2017-05-18 09:47:00 -07:00
rule := NewRecordingRule ( "a_plus_one" , expr , labels . Labels { } )
2020-02-12 07:22:18 -08:00
group := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
} )
2017-05-18 09:47:00 -07:00
// A time series that has two samples and then goes stale.
2020-07-24 07:10:51 -07:00
app := st . Appender ( context . Background ( ) )
2021-02-18 04:07:00 -08:00
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 0 , 1 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 1000 , 2 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 2000 , math . Float64frombits ( value . StaleNaN ) )
2017-11-11 02:29:47 -08:00
err = app . Commit ( )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2017-05-18 09:47:00 -07:00
2017-11-23 23:59:05 -08:00
ctx := context . Background ( )
2017-05-18 09:47:00 -07:00
// Execute 3 times, 1 second apart.
2017-11-23 23:59:05 -08:00
group . Eval ( ctx , time . Unix ( 0 , 0 ) )
group . Eval ( ctx , time . Unix ( 1 , 0 ) )
group . Eval ( ctx , time . Unix ( 2 , 0 ) )
2017-05-18 09:47:00 -07:00
2023-09-12 03:37:38 -07:00
querier , err := st . Querier ( 0 , 2000 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2017-10-09 09:03:33 -07:00
defer querier . Close ( )
2017-11-23 04:50:06 -08:00
matcher , err := labels . NewMatcher ( labels . MatchEqual , model . MetricNameLabel , "a_plus_one" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2017-11-23 04:50:06 -08:00
2023-09-12 03:37:38 -07:00
set := querier . Select ( ctx , false , nil , matcher )
2017-11-23 04:50:06 -08:00
samples , err := readSeriesSet ( set )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2017-11-23 04:50:06 -08:00
2017-05-18 09:47:00 -07:00
metric := labels . FromStrings ( model . MetricNameLabel , "a_plus_one" ) . String ( )
metricSample , ok := samples [ metric ]
2017-11-11 02:29:47 -08:00
2020-10-29 02:43:23 -07:00
require . True ( t , ok , "Series %s not returned." , metric )
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
require . True ( t , value . IsStaleNaN ( metricSample [ 2 ] . F ) , "Appended second sample not as expected. Wanted: stale NaN Got: %x" , math . Float64bits ( metricSample [ 2 ] . F ) )
metricSample [ 2 ] . F = 42 // require.Equal cannot handle NaN.
2017-05-18 09:47:00 -07:00
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
want := map [ string ] [ ] promql . FPoint {
metric : { { T : 0 , F : 2 } , { T : 1000 , F : 3 } , { T : 2000 , F : 42 } } ,
2017-05-18 09:47:00 -07:00
}
2020-10-29 02:43:23 -07:00
require . Equal ( t , want , samples )
2017-05-18 09:47:00 -07:00
}
2020-10-29 02:43:23 -07:00
// Convert a SeriesSet into a form usable with require.Equal.
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
func readSeriesSet ( ss storage . SeriesSet ) ( map [ string ] [ ] promql . FPoint , error ) {
result := map [ string ] [ ] promql . FPoint { }
2022-09-20 10:16:45 -07:00
var it chunkenc . Iterator
2017-05-18 09:47:00 -07:00
for ss . Next ( ) {
series := ss . At ( )
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
points := [ ] promql . FPoint { }
2022-09-20 10:16:45 -07:00
it := series . Iterator ( it )
2021-11-28 23:54:23 -08:00
for it . Next ( ) == chunkenc . ValFloat {
2017-05-18 09:47:00 -07:00
t , v := it . At ( )
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
points = append ( points , promql . FPoint { T : t , F : v } )
2017-05-18 09:47:00 -07:00
}
name := series . Labels ( ) . String ( )
result [ name ] = points
}
2017-05-19 08:43:59 -07:00
return result , ss . Err ( )
}
func TestCopyState ( t * testing . T ) {
oldGroup := & Group {
rules : [ ] Rule {
2023-01-09 03:21:38 -08:00
NewAlertingRule ( "alert" , nil , 0 , 0 , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "rule1" , nil , labels . EmptyLabels ( ) ) ,
NewRecordingRule ( "rule2" , nil , labels . EmptyLabels ( ) ) ,
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v1" ) ) ,
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v2" ) ) ,
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v3" ) ) ,
2023-01-09 03:21:38 -08:00
NewAlertingRule ( "alert2" , nil , 0 , 0 , labels . FromStrings ( "l2" , "v1" ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
2017-05-19 08:43:59 -07:00
} ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels {
2019-08-07 08:11:05 -07:00
{ } ,
{ } ,
{ } ,
2022-07-21 09:44:35 -07:00
{ "r3a" : labels . FromStrings ( "l1" , "v1" ) } ,
{ "r3b" : labels . FromStrings ( "l1" , "v2" ) } ,
{ "r3c" : labels . FromStrings ( "l1" , "v3" ) } ,
{ "a2" : labels . FromStrings ( "l2" , "v1" ) } ,
2017-05-19 08:43:59 -07:00
} ,
2020-08-25 03:38:06 -07:00
evaluationTime : time . Second ,
2017-05-19 08:43:59 -07:00
}
oldGroup . rules [ 0 ] . ( * AlertingRule ) . active [ 42 ] = nil
newGroup := & Group {
rules : [ ] Rule {
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v0" ) ) ,
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v1" ) ) ,
NewRecordingRule ( "rule3" , nil , labels . FromStrings ( "l1" , "v2" ) ) ,
2023-01-09 03:21:38 -08:00
NewAlertingRule ( "alert" , nil , 0 , 0 , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "rule1" , nil , labels . EmptyLabels ( ) ) ,
2023-01-09 03:21:38 -08:00
NewAlertingRule ( "alert2" , nil , 0 , 0 , labels . FromStrings ( "l2" , "v0" ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
NewAlertingRule ( "alert2" , nil , 0 , 0 , labels . FromStrings ( "l2" , "v1" ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "rule4" , nil , labels . EmptyLabels ( ) ) ,
2017-05-19 08:43:59 -07:00
} ,
2019-03-15 08:23:36 -07:00
seriesInPreviousEval : make ( [ ] map [ string ] labels . Labels , 8 ) ,
2017-05-19 08:43:59 -07:00
}
2018-07-18 06:14:38 -07:00
newGroup . CopyState ( oldGroup )
2017-05-19 08:43:59 -07:00
want := [ ] map [ string ] labels . Labels {
nil ,
2022-07-21 09:44:35 -07:00
{ "r3a" : labels . FromStrings ( "l1" , "v1" ) } ,
{ "r3b" : labels . FromStrings ( "l1" , "v2" ) } ,
2019-08-07 08:11:05 -07:00
{ } ,
{ } ,
2017-05-19 08:43:59 -07:00
nil ,
2022-07-21 09:44:35 -07:00
{ "a2" : labels . FromStrings ( "l2" , "v1" ) } ,
2019-03-15 08:23:36 -07:00
nil ,
2017-05-19 08:43:59 -07:00
}
2020-10-29 02:43:23 -07:00
require . Equal ( t , want , newGroup . seriesInPreviousEval )
require . Equal ( t , oldGroup . rules [ 0 ] , newGroup . rules [ 3 ] )
require . Equal ( t , oldGroup . evaluationTime , newGroup . evaluationTime )
require . Equal ( t , oldGroup . lastEvaluation , newGroup . lastEvaluation )
2022-07-21 09:44:35 -07:00
require . Equal ( t , [ ] labels . Labels { labels . FromStrings ( "l1" , "v3" ) } , newGroup . staleSeries )
2019-08-07 08:11:05 -07:00
}
func TestDeletedRuleMarkedStale ( t * testing . T ) {
2020-03-12 02:36:09 -07:00
st := teststorage . New ( t )
defer st . Close ( )
2019-08-07 08:11:05 -07:00
oldGroup := & Group {
rules : [ ] Rule {
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "rule1" , nil , labels . FromStrings ( "l1" , "v1" ) ) ,
2019-08-07 08:11:05 -07:00
} ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels {
2022-07-21 09:44:35 -07:00
{ "r1" : labels . FromStrings ( "l1" , "v1" ) } ,
2019-08-07 08:11:05 -07:00
} ,
}
newGroup := & Group {
rules : [ ] Rule { } ,
seriesInPreviousEval : [ ] map [ string ] labels . Labels { } ,
opts : & ManagerOptions {
2020-03-12 02:36:09 -07:00
Appendable : st ,
2019-08-07 08:11:05 -07:00
} ,
}
newGroup . CopyState ( oldGroup )
newGroup . Eval ( context . Background ( ) , time . Unix ( 0 , 0 ) )
2023-09-12 03:37:38 -07:00
querier , err := st . Querier ( 0 , 2000 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-08-07 08:11:05 -07:00
defer querier . Close ( )
matcher , err := labels . NewMatcher ( labels . MatchEqual , "l1" , "v1" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-08-07 08:11:05 -07:00
2023-09-12 03:37:38 -07:00
set := querier . Select ( context . Background ( ) , false , nil , matcher )
2019-08-07 08:11:05 -07:00
samples , err := readSeriesSet ( set )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-08-07 08:11:05 -07:00
metric := labels . FromStrings ( "l1" , "v1" ) . String ( )
metricSample , ok := samples [ metric ]
2020-10-29 02:43:23 -07:00
require . True ( t , ok , "Series %s not returned." , metric )
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
require . True ( t , value . IsStaleNaN ( metricSample [ 0 ] . F ) , "Appended sample not as expected. Wanted: stale NaN Got: %x" , math . Float64bits ( metricSample [ 0 ] . F ) )
2017-05-18 09:47:00 -07:00
}
2017-11-01 04:58:00 -07:00
2017-11-23 06:48:14 -08:00
func TestUpdate ( t * testing . T ) {
2018-06-22 07:21:04 -07:00
files := [ ] string { "fixtures/rules.yaml" }
2017-11-01 04:58:00 -07:00
expected := map [ string ] labels . Labels {
2017-11-23 06:48:14 -08:00
"test" : labels . FromStrings ( "name" , "value" ) ,
2017-11-01 04:58:00 -07:00
}
2020-03-12 02:36:09 -07:00
st := teststorage . New ( t )
defer st . Close ( )
2018-10-02 04:59:19 -07:00
opts := promql . EngineOpts {
2020-01-28 12:38:49 -08:00
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
2018-10-02 04:59:19 -07:00
}
engine := promql . NewEngine ( opts )
2017-11-01 04:58:00 -07:00
ruleManager := NewManager ( & ManagerOptions {
2020-03-12 02:36:09 -07:00
Appendable : st ,
2020-06-26 11:06:36 -07:00
Queryable : st ,
2020-03-12 02:36:09 -07:00
QueryFunc : EngineQueryFunc ( engine , st ) ,
2018-08-20 05:51:05 -07:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
2017-11-01 04:58:00 -07:00
} )
2020-07-21 15:13:24 -07:00
ruleManager . start ( )
2018-08-20 05:51:05 -07:00
defer ruleManager . Stop ( )
2017-11-01 04:58:00 -07:00
2022-07-21 09:44:35 -07:00
err := ruleManager . Update ( 10 * time . Second , files , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2023-12-07 03:35:01 -08:00
require . NotEmpty ( t , ruleManager . groups , "expected non-empty rule groups" )
2019-12-19 02:41:11 -08:00
ogs := map [ string ] * Group { }
for h , g := range ruleManager . groups {
2017-11-01 04:58:00 -07:00
g . seriesInPreviousEval = [ ] map [ string ] labels . Labels {
expected ,
}
2019-12-19 02:41:11 -08:00
ogs [ h ] = g
2017-11-01 04:58:00 -07:00
}
2022-07-21 09:44:35 -07:00
err = ruleManager . Update ( 10 * time . Second , files , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-19 02:41:11 -08:00
for h , g := range ruleManager . groups {
2017-11-01 04:58:00 -07:00
for _ , actual := range g . seriesInPreviousEval {
2020-10-29 02:43:23 -07:00
require . Equal ( t , expected , actual )
2017-11-01 04:58:00 -07:00
}
2019-12-19 02:41:11 -08:00
// Groups are the same because of no updates.
2020-10-29 02:43:23 -07:00
require . Equal ( t , ogs [ h ] , g )
2019-12-19 02:41:11 -08:00
}
// Groups will be recreated if updated.
rgs , errs := rulefmt . ParseFile ( "fixtures/rules.yaml" )
2023-12-07 03:35:01 -08:00
require . Empty ( t , errs , "file parsing failures" )
2019-12-19 02:41:11 -08:00
2022-04-27 02:24:36 -07:00
tmpFile , err := os . CreateTemp ( "" , "rules.test.*.yaml" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-19 02:41:11 -08:00
defer os . Remove ( tmpFile . Name ( ) )
defer tmpFile . Close ( )
2022-07-21 09:44:35 -07:00
err = ruleManager . Update ( 10 * time . Second , [ ] string { tmpFile . Name ( ) } , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-19 02:41:11 -08:00
for h , g := range ruleManager . groups {
ogs [ h ] = g
}
2019-12-19 02:46:22 -08:00
// Update interval and reload.
2019-12-19 02:41:11 -08:00
for i , g := range rgs . Groups {
if g . Interval != 0 {
rgs . Groups [ i ] . Interval = g . Interval * 2
} else {
rgs . Groups [ i ] . Interval = model . Duration ( 10 )
}
}
2023-04-12 04:05:41 -07:00
reloadAndValidate ( rgs , t , tmpFile , ruleManager , ogs )
2019-12-19 02:41:11 -08:00
2021-09-15 00:48:26 -07:00
// Update limit and reload.
for i := range rgs . Groups {
rgs . Groups [ i ] . Limit = 1
}
2023-04-12 04:05:41 -07:00
reloadAndValidate ( rgs , t , tmpFile , ruleManager , ogs )
2021-09-15 00:48:26 -07:00
2019-12-19 02:46:22 -08:00
// Change group rules and reload.
2019-12-19 02:41:11 -08:00
for i , g := range rgs . Groups {
for j , r := range g . Rules {
2020-01-15 10:07:54 -08:00
rgs . Groups [ i ] . Rules [ j ] . Expr . SetString ( fmt . Sprintf ( "%s * 0" , r . Expr . Value ) )
2019-12-19 02:41:11 -08:00
}
}
2023-04-12 04:05:41 -07:00
reloadAndValidate ( rgs , t , tmpFile , ruleManager , ogs )
2019-12-19 02:41:11 -08:00
}
2020-01-15 10:07:54 -08:00
// ruleGroupsTest for running tests over rules.
type ruleGroupsTest struct {
Groups [ ] ruleGroupTest ` yaml:"groups" `
}
// ruleGroupTest forms a testing struct for running tests over rules.
type ruleGroupTest struct {
Name string ` yaml:"name" `
Interval model . Duration ` yaml:"interval,omitempty" `
2021-09-15 00:48:26 -07:00
Limit int ` yaml:"limit,omitempty" `
2020-01-15 10:07:54 -08:00
Rules [ ] rulefmt . Rule ` yaml:"rules" `
}
func formatRules ( r * rulefmt . RuleGroups ) ruleGroupsTest {
grps := r . Groups
tmp := [ ] ruleGroupTest { }
for _ , g := range grps {
rtmp := [ ] rulefmt . Rule { }
for _ , r := range g . Rules {
rtmp = append ( rtmp , rulefmt . Rule {
Record : r . Record . Value ,
Alert : r . Alert . Value ,
Expr : r . Expr . Value ,
For : r . For ,
Labels : r . Labels ,
Annotations : r . Annotations ,
} )
}
tmp = append ( tmp , ruleGroupTest {
Name : g . Name ,
Interval : g . Interval ,
2021-09-15 00:48:26 -07:00
Limit : g . Limit ,
2020-01-15 10:07:54 -08:00
Rules : rtmp ,
} )
}
return ruleGroupsTest {
Groups : tmp ,
}
}
2023-04-12 04:05:41 -07:00
func reloadAndValidate ( rgs * rulefmt . RuleGroups , t * testing . T , tmpFile * os . File , ruleManager * Manager , ogs map [ string ] * Group ) {
2020-01-15 10:07:54 -08:00
bs , err := yaml . Marshal ( formatRules ( rgs ) )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-19 02:41:11 -08:00
tmpFile . Seek ( 0 , 0 )
_ , err = tmpFile . Write ( bs )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2022-07-21 09:44:35 -07:00
err = ruleManager . Update ( 10 * time . Second , [ ] string { tmpFile . Name ( ) } , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2019-12-19 02:41:11 -08:00
for h , g := range ruleManager . groups {
if ogs [ h ] == g {
t . Fail ( )
}
ogs [ h ] = g
2017-11-01 04:58:00 -07:00
}
}
2018-08-27 09:41:42 -07:00
func TestNotify ( t * testing . T ) {
2019-08-08 18:35:39 -07:00
storage := teststorage . New ( t )
2018-08-27 09:41:42 -07:00
defer storage . Close ( )
2018-10-02 04:59:19 -07:00
engineOpts := promql . EngineOpts {
2020-01-28 12:38:49 -08:00
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
2018-10-02 04:59:19 -07:00
}
engine := promql . NewEngine ( engineOpts )
2018-08-27 09:41:42 -07:00
var lastNotified [ ] * Alert
notifyFunc := func ( ctx context . Context , expr string , alerts ... * Alert ) {
lastNotified = alerts
}
opts := & ManagerOptions {
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Appendable : storage ,
2020-06-26 11:06:36 -07:00
Queryable : storage ,
2018-08-27 09:41:42 -07:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
NotifyFunc : notifyFunc ,
ResendDelay : 2 * time . Second ,
}
2020-02-03 10:23:07 -08:00
expr , err := parser . ParseExpr ( "a > 1" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2023-01-09 03:21:38 -08:00
rule := NewAlertingRule ( "aTooHigh" , expr , 0 , 0 , labels . Labels { } , labels . Labels { } , labels . EmptyLabels ( ) , "" , true , log . NewNopLogger ( ) )
2020-02-12 07:22:18 -08:00
group := NewGroup ( GroupOptions {
Name : "alert" ,
Interval : time . Second ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
} )
2018-08-27 09:41:42 -07:00
2020-07-24 07:10:51 -07:00
app := storage . Appender ( context . Background ( ) )
2021-02-18 04:07:00 -08:00
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 1000 , 2 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 2000 , 3 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 5000 , 3 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 6000 , 0 )
2018-08-27 09:41:42 -07:00
err = app . Commit ( )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2018-08-27 09:41:42 -07:00
ctx := context . Background ( )
// Alert sent right away
group . Eval ( ctx , time . Unix ( 1 , 0 ) )
2023-12-07 03:35:01 -08:00
require . Len ( t , lastNotified , 1 )
2020-10-29 02:43:23 -07:00
require . NotZero ( t , lastNotified [ 0 ] . ValidUntil , "ValidUntil should not be zero" )
2018-08-27 09:41:42 -07:00
// Alert is not sent 1s later
group . Eval ( ctx , time . Unix ( 2 , 0 ) )
2023-12-07 03:35:01 -08:00
require . Empty ( t , lastNotified )
2018-08-27 09:41:42 -07:00
// Alert is resent at t=5s
group . Eval ( ctx , time . Unix ( 5 , 0 ) )
2023-12-07 03:35:01 -08:00
require . Len ( t , lastNotified , 1 )
2018-08-27 09:41:42 -07:00
// Resolution alert sent right away
group . Eval ( ctx , time . Unix ( 6 , 0 ) )
2023-12-07 03:35:01 -08:00
require . Len ( t , lastNotified , 1 )
2018-08-27 09:41:42 -07:00
}
2020-01-27 04:41:32 -08:00
func TestMetricsUpdate ( t * testing . T ) {
files := [ ] string { "fixtures/rules.yaml" , "fixtures/rules2.yaml" }
metricNames := [ ] string {
2020-04-08 14:21:37 -07:00
"prometheus_rule_evaluations_total" ,
"prometheus_rule_evaluation_failures_total" ,
2020-01-27 04:41:32 -08:00
"prometheus_rule_group_interval_seconds" ,
"prometheus_rule_group_last_duration_seconds" ,
"prometheus_rule_group_last_evaluation_timestamp_seconds" ,
"prometheus_rule_group_rules" ,
}
storage := teststorage . New ( t )
defer storage . Close ( )
2020-07-21 15:13:24 -07:00
registry := prometheus . NewRegistry ( )
2020-01-27 04:41:32 -08:00
opts := promql . EngineOpts {
2020-01-28 12:38:49 -08:00
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
2020-01-27 04:41:32 -08:00
}
engine := promql . NewEngine ( opts )
ruleManager := NewManager ( & ManagerOptions {
Appendable : storage ,
2020-06-26 11:06:36 -07:00
Queryable : storage ,
2020-01-27 04:41:32 -08:00
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
Registerer : registry ,
} )
2020-07-21 15:13:24 -07:00
ruleManager . start ( )
2020-01-27 04:41:32 -08:00
defer ruleManager . Stop ( )
countMetrics := func ( ) int {
ms , err := registry . Gather ( )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-01-27 04:41:32 -08:00
var metrics int
for _ , m := range ms {
s := m . GetName ( )
for _ , n := range metricNames {
if s == n {
metrics += len ( m . Metric )
break
}
}
}
return metrics
}
cases := [ ] struct {
files [ ] string
metrics int
} {
{
files : files ,
2020-04-08 14:21:37 -07:00
metrics : 12 ,
2020-01-27 04:41:32 -08:00
} ,
{
files : files [ : 1 ] ,
2020-04-08 14:21:37 -07:00
metrics : 6 ,
2020-01-27 04:41:32 -08:00
} ,
{
files : files [ : 0 ] ,
metrics : 0 ,
} ,
{
files : files [ 1 : ] ,
2020-04-08 14:21:37 -07:00
metrics : 6 ,
2020-01-27 04:41:32 -08:00
} ,
}
for i , c := range cases {
2022-07-21 09:44:35 -07:00
err := ruleManager . Update ( time . Second , c . files , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-01-27 04:41:32 -08:00
time . Sleep ( 2 * time . Second )
2020-10-29 02:43:23 -07:00
require . Equal ( t , c . metrics , countMetrics ( ) , "test %d: invalid count of metrics" , i )
2020-01-27 04:41:32 -08:00
}
}
2020-02-12 07:22:18 -08:00
func TestGroupStalenessOnRemoval ( t * testing . T ) {
if testing . Short ( ) {
t . Skip ( "skipping test in short mode." )
}
files := [ ] string { "fixtures/rules2.yaml" }
sameFiles := [ ] string { "fixtures/rules2_copy.yaml" }
storage := teststorage . New ( t )
defer storage . Close ( )
opts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( opts )
ruleManager := NewManager ( & ManagerOptions {
Appendable : storage ,
2020-06-26 11:06:36 -07:00
Queryable : storage ,
2020-02-12 07:22:18 -08:00
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
} )
var stopped bool
2020-07-21 15:13:24 -07:00
ruleManager . start ( )
2020-02-12 07:22:18 -08:00
defer func ( ) {
if ! stopped {
ruleManager . Stop ( )
}
} ( )
cases := [ ] struct {
files [ ] string
staleNaN int
} {
{
files : files ,
staleNaN : 0 ,
} ,
{
// When we remove the files, it should produce a staleness marker.
files : files [ : 0 ] ,
staleNaN : 1 ,
} ,
{
// Rules that produce the same metrics but in a different file
// should not produce staleness marker.
files : sameFiles ,
staleNaN : 0 ,
} ,
{
// Staleness marker should be present as we don't have any rules
// loaded anymore.
files : files [ : 0 ] ,
staleNaN : 1 ,
} ,
{
// Add rules back so we have rules loaded when we stop the manager
// and check for the absence of staleness markers.
files : sameFiles ,
staleNaN : 0 ,
} ,
}
var totalStaleNaN int
for i , c := range cases {
2022-07-21 09:44:35 -07:00
err := ruleManager . Update ( time . Second , c . files , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
time . Sleep ( 3 * time . Second )
totalStaleNaN += c . staleNaN
2020-10-29 02:43:23 -07:00
require . Equal ( t , totalStaleNaN , countStaleNaN ( t , storage ) , "test %d/%q: invalid count of staleness markers" , i , c . files )
2020-02-12 07:22:18 -08:00
}
ruleManager . Stop ( )
stopped = true
2020-10-29 02:43:23 -07:00
require . Equal ( t , totalStaleNaN , countStaleNaN ( t , storage ) , "invalid count of staleness markers after stopping the engine" )
2020-02-12 07:22:18 -08:00
}
func TestMetricsStalenessOnManagerShutdown ( t * testing . T ) {
if testing . Short ( ) {
t . Skip ( "skipping test in short mode." )
}
files := [ ] string { "fixtures/rules2.yaml" }
storage := teststorage . New ( t )
defer storage . Close ( )
opts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( opts )
ruleManager := NewManager ( & ManagerOptions {
Appendable : storage ,
2020-06-26 11:06:36 -07:00
Queryable : storage ,
2020-02-12 07:22:18 -08:00
QueryFunc : EngineQueryFunc ( engine , storage ) ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
} )
var stopped bool
2020-07-21 15:13:24 -07:00
ruleManager . start ( )
2020-02-12 07:22:18 -08:00
defer func ( ) {
if ! stopped {
ruleManager . Stop ( )
}
} ( )
2022-07-21 09:44:35 -07:00
err := ruleManager . Update ( 2 * time . Second , files , labels . EmptyLabels ( ) , "" , nil )
2020-02-12 07:22:18 -08:00
time . Sleep ( 4 * time . Second )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
start := time . Now ( )
2022-07-21 09:44:35 -07:00
err = ruleManager . Update ( 3 * time . Second , files [ : 0 ] , labels . EmptyLabels ( ) , "" , nil )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
ruleManager . Stop ( )
stopped = true
2023-12-07 03:35:01 -08:00
require . Less ( t , time . Since ( start ) , 1 * time . Second , "rule manager does not stop early" )
2020-02-12 07:22:18 -08:00
time . Sleep ( 5 * time . Second )
2020-10-29 02:43:23 -07:00
require . Equal ( t , 0 , countStaleNaN ( t , storage ) , "invalid count of staleness markers after stopping the engine" )
2020-02-12 07:22:18 -08:00
}
2020-03-12 02:36:09 -07:00
func countStaleNaN ( t * testing . T , st storage . Storage ) int {
2020-02-12 07:22:18 -08:00
var c int
2023-09-12 03:37:38 -07:00
querier , err := st . Querier ( 0 , time . Now ( ) . Unix ( ) * 1000 )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
defer querier . Close ( )
matcher , err := labels . NewMatcher ( labels . MatchEqual , model . MetricNameLabel , "test_2" )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
2023-09-12 03:37:38 -07:00
set := querier . Select ( context . Background ( ) , false , nil , matcher )
2020-02-12 07:22:18 -08:00
samples , err := readSeriesSet ( set )
2020-10-29 02:43:23 -07:00
require . NoError ( t , err )
2020-02-12 07:22:18 -08:00
metric := labels . FromStrings ( model . MetricNameLabel , "test_2" ) . String ( )
metricSample , ok := samples [ metric ]
2020-10-29 02:43:23 -07:00
require . True ( t , ok , "Series %s not returned." , metric )
2020-02-12 07:22:18 -08:00
for _ , s := range metricSample {
promql: Separate `Point` into `FPoint` and `HPoint`
In other words: Instead of having a “polymorphous” `Point` that can
either contain a float value or a histogram value, use an `FPoint` for
floats and an `HPoint` for histograms.
This seemingly small change has a _lot_ of repercussions throughout
the codebase.
The idea here is to avoid the increase in size of `Point` arrays that
happened after native histograms had been added.
The higher-level data structures (`Sample`, `Series`, etc.) are still
“polymorphous”. The same idea could be applied to them, but at each
step the trade-offs needed to be evaluated.
The idea with this change is to do the minimum necessary to get back
to pre-histogram performance for functions that do not touch
histograms. Here are comparisons for the `changes` function. The test
data doesn't include histograms yet. Ideally, there would be no change
in the benchmark result at all.
First runtime v2.39 compared to directly prior to this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10)
```
And then runtime v2.39 compared to after this commit:
```
name old time/op new time/op delta
RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8)
RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8)
RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10)
RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9)
RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10)
RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10)
RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9)
```
In summary, the runtime doesn't really improve with this change for
queries with just a few steps. For queries with many steps, this
commit essentially reinstates the old performance. This is good
because the many-step queries are the one that matter most (longest
absolute runtime).
In terms of allocations, though, this commit doesn't make a dent at
all (numbers not shown). The reason is that most of the allocations
happen in the sampleRingIterator (in the storage package), which has
to be addressed in a separate commit.
Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
if value . IsStaleNaN ( s . F ) {
2020-02-12 07:22:18 -08:00
c ++
}
}
return c
}
2020-07-06 02:35:16 -07:00
func TestGroupHasAlertingRules ( t * testing . T ) {
tests := [ ] struct {
group * Group
want bool
} {
{
group : & Group {
name : "HasAlertingRule" ,
rules : [ ] Rule {
2023-01-09 03:21:38 -08:00
NewAlertingRule ( "alert" , nil , 0 , 0 , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , labels . EmptyLabels ( ) , "" , true , nil ) ,
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "record" , nil , labels . EmptyLabels ( ) ) ,
2020-07-06 02:35:16 -07:00
} ,
} ,
want : true ,
} ,
{
group : & Group {
name : "HasNoRule" ,
rules : [ ] Rule { } ,
} ,
want : false ,
} ,
{
group : & Group {
name : "HasOnlyRecordingRule" ,
rules : [ ] Rule {
2022-07-21 09:44:35 -07:00
NewRecordingRule ( "record" , nil , labels . EmptyLabels ( ) ) ,
2020-07-06 02:35:16 -07:00
} ,
} ,
want : false ,
} ,
}
for i , test := range tests {
got := test . group . HasAlertingRules ( )
2020-10-29 02:43:23 -07:00
require . Equal ( t , test . want , got , "test case %d failed, expected:%t got:%t" , i , test . want , got )
2020-07-06 02:35:16 -07:00
}
}
2021-03-18 07:44:33 -07:00
func TestRuleHealthUpdates ( t * testing . T ) {
st := teststorage . New ( t )
defer st . Close ( )
engineOpts := promql . EngineOpts {
Logger : nil ,
Reg : nil ,
MaxSamples : 10 ,
Timeout : 10 * time . Second ,
}
engine := promql . NewEngine ( engineOpts )
opts := & ManagerOptions {
QueryFunc : EngineQueryFunc ( engine , st ) ,
Appendable : st ,
Queryable : st ,
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
}
expr , err := parser . ParseExpr ( "a + 1" )
require . NoError ( t , err )
rule := NewRecordingRule ( "a_plus_one" , expr , labels . Labels { } )
group := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
} )
// A time series that has two samples.
app := st . Appender ( context . Background ( ) )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 0 , 1 )
app . Append ( 0 , labels . FromStrings ( model . MetricNameLabel , "a" ) , 1000 , 2 )
err = app . Commit ( )
require . NoError ( t , err )
ctx := context . Background ( )
rules := group . Rules ( ) [ 0 ]
require . NoError ( t , rules . LastError ( ) )
require . Equal ( t , HealthUnknown , rules . Health ( ) )
// Execute 2 times, it should be all green.
group . Eval ( ctx , time . Unix ( 0 , 0 ) )
group . Eval ( ctx , time . Unix ( 1 , 0 ) )
rules = group . Rules ( ) [ 0 ]
require . NoError ( t , rules . LastError ( ) )
require . Equal ( t , HealthGood , rules . Health ( ) )
// Now execute the rule in the past again, this should cause append failures.
group . Eval ( ctx , time . Unix ( 0 , 0 ) )
rules = group . Rules ( ) [ 0 ]
require . EqualError ( t , rules . LastError ( ) , storage . ErrOutOfOrderSample . Error ( ) )
require . Equal ( t , HealthBad , rules . Health ( ) )
}
2022-03-28 17:16:46 -07:00
2023-04-04 11:21:13 -07:00
func TestRuleGroupEvalIterationFunc ( t * testing . T ) {
2023-08-18 11:48:59 -07:00
storage := promql . LoadedStorage ( t , `
2022-03-28 17:16:46 -07:00
load 5 m
http_requests { instance = "0" } 75 85 50 0 0 25 0 0 40 0 120
` )
2023-08-18 11:48:59 -07:00
t . Cleanup ( func ( ) { storage . Close ( ) } )
2022-03-28 17:16:46 -07:00
expr , err := parser . ParseExpr ( ` http_requests { group="canary", job="app-server"} < 100 ` )
require . NoError ( t , err )
testValue := 1
2023-04-04 11:21:13 -07:00
evalIterationFunc := func ( ctx context . Context , g * Group , evalTimestamp time . Time ) {
2022-03-28 17:16:46 -07:00
testValue = 2
2023-04-04 11:21:13 -07:00
DefaultEvalIterationFunc ( ctx , g , evalTimestamp )
testValue = 3
}
skipEvalIterationFunc := func ( ctx context . Context , g * Group , evalTimestamp time . Time ) {
testValue = 4
2022-03-28 17:16:46 -07:00
}
type testInput struct {
2023-04-04 11:21:13 -07:00
evalIterationFunc GroupEvalIterationFunc
expectedValue int
lastEvalTimestampIsZero bool
2022-03-28 17:16:46 -07:00
}
tests := [ ] testInput {
2023-04-04 11:21:13 -07:00
// testValue should still have value of 1 since the default iteration function will be called.
2022-03-28 17:16:46 -07:00
{
2023-04-04 11:21:13 -07:00
evalIterationFunc : nil ,
expectedValue : 1 ,
lastEvalTimestampIsZero : false ,
2022-03-28 17:16:46 -07:00
} ,
2023-04-04 11:21:13 -07:00
// testValue should be incremented to 3 since evalIterationFunc is called.
2022-03-28 17:16:46 -07:00
{
2023-04-04 11:21:13 -07:00
evalIterationFunc : evalIterationFunc ,
expectedValue : 3 ,
lastEvalTimestampIsZero : false ,
} ,
// testValue should be incremented to 4 since skipEvalIterationFunc is called.
{
evalIterationFunc : skipEvalIterationFunc ,
expectedValue : 4 ,
lastEvalTimestampIsZero : true ,
2022-03-28 17:16:46 -07:00
} ,
}
testFunc := func ( tst testInput ) {
opts := & ManagerOptions {
2023-08-18 11:48:59 -07:00
QueryFunc : EngineQueryFunc ( testEngine , storage ) ,
Appendable : storage ,
Queryable : storage ,
2022-03-28 17:16:46 -07:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
NotifyFunc : func ( ctx context . Context , expr string , alerts ... * Alert ) { } ,
OutageTolerance : 30 * time . Minute ,
ForGracePeriod : 10 * time . Minute ,
}
activeAlert := & Alert {
State : StateFiring ,
ActiveAt : time . Now ( ) ,
}
m := map [ uint64 ] * Alert { }
m [ 1 ] = activeAlert
rule := & AlertingRule {
2022-07-19 03:58:37 -07:00
name : "HTTPRequestRateLow" ,
vector : expr ,
holdDuration : 5 * time . Minute ,
labels : labels . FromStrings ( "severity" , "critical" ) ,
2022-07-21 09:44:35 -07:00
annotations : labels . EmptyLabels ( ) ,
2022-07-19 03:58:37 -07:00
externalLabels : nil ,
externalURL : "" ,
active : m ,
logger : nil ,
restored : atomic . NewBool ( true ) ,
health : atomic . NewString ( string ( HealthUnknown ) ) ,
evaluationTimestamp : atomic . NewTime ( time . Time { } ) ,
evaluationDuration : atomic . NewDuration ( 0 ) ,
lastError : atomic . NewError ( nil ) ,
2022-03-28 17:16:46 -07:00
}
group := NewGroup ( GroupOptions {
2023-04-04 11:21:13 -07:00
Name : "default" ,
Interval : time . Second ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
EvalIterationFunc : tst . evalIterationFunc ,
2022-03-28 17:16:46 -07:00
} )
go func ( ) {
group . run ( opts . Context )
} ( )
time . Sleep ( 3 * time . Second )
group . stop ( )
2023-04-04 11:21:13 -07:00
2022-03-28 17:16:46 -07:00
require . Equal ( t , tst . expectedValue , testValue )
2023-04-04 11:21:13 -07:00
if tst . lastEvalTimestampIsZero {
require . Zero ( t , group . GetLastEvalTimestamp ( ) )
} else {
oneMinute , _ := time . ParseDuration ( "1m" )
require . WithinDuration ( t , time . Now ( ) , group . GetLastEvalTimestamp ( ) , oneMinute )
}
2022-03-28 17:16:46 -07:00
}
2023-04-04 11:21:13 -07:00
for i , tst := range tests {
t . Logf ( "case %d" , i )
2022-03-28 17:16:46 -07:00
testFunc ( tst )
}
}
2023-01-11 04:48:18 -08:00
func TestNativeHistogramsInRecordingRules ( t * testing . T ) {
2023-08-18 11:48:59 -07:00
storage := teststorage . New ( t )
t . Cleanup ( func ( ) { storage . Close ( ) } )
2023-01-11 04:48:18 -08:00
// Add some histograms.
2023-08-18 11:48:59 -07:00
db := storage . DB
2023-02-10 03:39:33 -08:00
hists := tsdbutil . GenerateTestHistograms ( 5 )
2023-01-11 04:48:18 -08:00
ts := time . Now ( )
app := db . Appender ( context . Background ( ) )
for i , h := range hists {
l := labels . FromStrings ( "__name__" , "histogram_metric" , "idx" , fmt . Sprintf ( "%d" , i ) )
_ , err := app . AppendHistogram ( 0 , l , ts . UnixMilli ( ) , h . Copy ( ) , nil )
require . NoError ( t , err )
}
require . NoError ( t , app . Commit ( ) )
opts := & ManagerOptions {
2023-08-18 11:48:59 -07:00
QueryFunc : EngineQueryFunc ( testEngine , storage ) ,
Appendable : storage ,
Queryable : storage ,
2023-01-11 04:48:18 -08:00
Context : context . Background ( ) ,
Logger : log . NewNopLogger ( ) ,
}
expr , err := parser . ParseExpr ( "sum(histogram_metric)" )
require . NoError ( t , err )
rule := NewRecordingRule ( "sum:histogram_metric" , expr , labels . Labels { } )
group := NewGroup ( GroupOptions {
Name : "default" ,
Interval : time . Hour ,
Rules : [ ] Rule { rule } ,
ShouldRestore : true ,
Opts : opts ,
} )
group . Eval ( context . Background ( ) , ts . Add ( 10 * time . Second ) )
2023-09-12 03:37:38 -07:00
q , err := db . Querier ( ts . UnixMilli ( ) , ts . Add ( 20 * time . Second ) . UnixMilli ( ) )
2023-01-11 04:48:18 -08:00
require . NoError ( t , err )
2023-09-12 03:37:38 -07:00
ss := q . Select ( context . Background ( ) , false , nil , labels . MustNewMatcher ( labels . MatchEqual , "__name__" , "sum:histogram_metric" ) )
2023-01-11 04:48:18 -08:00
require . True ( t , ss . Next ( ) )
s := ss . At ( )
require . False ( t , ss . Next ( ) )
require . Equal ( t , labels . FromStrings ( "__name__" , "sum:histogram_metric" ) , s . Labels ( ) )
expHist := hists [ 0 ] . ToFloat ( )
for _ , h := range hists [ 1 : ] {
expHist = expHist . Add ( h . ToFloat ( ) )
}
it := s . Iterator ( nil )
require . Equal ( t , chunkenc . ValFloatHistogram , it . Next ( ) )
tsp , fh := it . AtFloatHistogram ( )
require . Equal ( t , ts . Add ( 10 * time . Second ) . UnixMilli ( ) , tsp )
require . Equal ( t , expHist , fh )
require . Equal ( t , chunkenc . ValNone , it . Next ( ) )
}