prometheus/promql/engine_test.go

3658 lines
106 KiB
Go
Raw Normal View History

// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package promql_test
import (
"context"
"errors"
"fmt"
fix!: stop unbounded memory usage from query log Resolves: #15433 When I converted prometheus to use slog in #14906, I update both the `QueryLogger` interface, as well as how the log calls to the `QueryLogger` were built up in `promql.Engine.exec()`. The backing logger for the `QueryLogger` in the engine is a `util/logging.JSONFileLogger`, and it's implementation of the `With()` method updates the logger the logger in place with the new keyvals added onto the underlying slog.Logger, which means they get inherited onto everything after. All subsequent calls to `With()`, even in later queries, would continue to then append on more and more keyvals for the various params and fields built up in the logger. In turn, this causes unbounded growth of the logger, leading to increased memory usage, and in at least one report was the likely cause of an OOM kill. More information can be found in the issue and the linked slack thread. This commit does a few things: - It was referenced in feedback in #14906 that it would've been better to not change the `QueryLogger` interface if possible, this PR proposes changes that bring it closer to alignment with the pre-3.0 `QueryLogger` interface contract - reverts `promql.Engine.exec()`'s usage of the query logger to the pattern of building up an array of args to pass at once to the end log call. Avoiding the repetitious calls to `.With()` are what resolve the issue with the logger growth/memory usage. - updates the scrape failure logger to use the update `QueryLogger` methods in the contract. - updates tests accordingly - cleans up unused methods Builds and passes tests successfully. Tested locally and confirmed I could no longer reproduce the issue/it resolved the issue. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
"log/slog"
"math"
Don't sort postings if we only have one block. Sorting the heads postings can be quite slow. We only need sorted series when merging with another querier, so only sort then. This will make big queries that only touch the head faster, though queries that touch both the head and a block will still be the same speed. This probably won't help much with graphing unless the range is under an hour, however it should make most recording rules faster. Add gaurantee that remote read streaming produces sorted series. PromQL benchmarks for histograms show only 2-3% improvement, but they're only over 1k series. benchmark old ns/op new ns/op delta BenchmarkQuerierSelect/Head/1of1000000-4 1375486282 507657736 -63.09% BenchmarkQuerierSelect/Head/10of1000000-4 1387859004 507769850 -63.41% BenchmarkQuerierSelect/Head/100of1000000-4 1387087935 506029110 -63.52% BenchmarkQuerierSelect/Head/1000of1000000-4 1386869064 504521986 -63.62% BenchmarkQuerierSelect/Head/10000of1000000-4 1386213685 505210422 -63.55% BenchmarkQuerierSelect/Head/100000of1000000-4 1392754988 529842406 -61.96% BenchmarkQuerierSelect/Head/1000000of1000000-4 1569414722 725059506 -53.80% BenchmarkQuerierSelect/SortedHead/1of1000000-4 1381019902 1370495863 -0.76% BenchmarkQuerierSelect/SortedHead/10of1000000-4 1375696209 1366789468 -0.65% BenchmarkQuerierSelect/SortedHead/100of1000000-4 1386009422 1364519297 -1.55% BenchmarkQuerierSelect/SortedHead/1000of1000000-4 1377700532 1364486191 -0.96% BenchmarkQuerierSelect/SortedHead/10000of1000000-4 1383539536 1369545314 -1.01% BenchmarkQuerierSelect/SortedHead/100000of1000000-4 1410089163 1394731339 -1.09% BenchmarkQuerierSelect/SortedHead/1000000of1000000-4 1634744148 1581554956 -3.25% BenchmarkQuerierSelect/Block/1of1000000-4 881741242 879839470 -0.22% BenchmarkQuerierSelect/Block/10of1000000-4 880381562 882846038 +0.28% BenchmarkQuerierSelect/Block/100of1000000-4 887519357 881016916 -0.73% BenchmarkQuerierSelect/Block/1000of1000000-4 902194205 883433524 -2.08% BenchmarkQuerierSelect/Block/10000of1000000-4 892321964 885130170 -0.81% BenchmarkQuerierSelect/Block/100000of1000000-4 938604466 933527150 -0.54% BenchmarkQuerierSelect/Block/1000000of1000000-4 1313510845 1295881124 -1.34% benchmark old allocs new allocs delta BenchmarkQuerierSelect/Head/1of1000000-4 4000056 4000018 -0.00% BenchmarkQuerierSelect/Head/10of1000000-4 4000074 4000036 -0.00% BenchmarkQuerierSelect/Head/100of1000000-4 4000254 4000216 -0.00% BenchmarkQuerierSelect/Head/1000of1000000-4 4002054 4002016 -0.00% BenchmarkQuerierSelect/Head/10000of1000000-4 4020054 4020016 -0.00% BenchmarkQuerierSelect/Head/100000of1000000-4 4200054 4200016 -0.00% BenchmarkQuerierSelect/Head/1000000of1000000-4 6000054 6000016 -0.00% BenchmarkQuerierSelect/SortedHead/1of1000000-4 4000071 4000071 +0.00% BenchmarkQuerierSelect/SortedHead/10of1000000-4 4000089 4000089 +0.00% BenchmarkQuerierSelect/SortedHead/100of1000000-4 4000269 4000269 +0.00% BenchmarkQuerierSelect/SortedHead/1000of1000000-4 4002069 4002069 +0.00% BenchmarkQuerierSelect/SortedHead/10000of1000000-4 4020069 4020069 +0.00% BenchmarkQuerierSelect/SortedHead/100000of1000000-4 4200069 4200069 +0.00% BenchmarkQuerierSelect/SortedHead/1000000of1000000-4 6000069 6000069 +0.00% BenchmarkQuerierSelect/Block/1of1000000-4 6000023 6000022 -0.00% BenchmarkQuerierSelect/Block/10of1000000-4 6000059 6000058 -0.00% BenchmarkQuerierSelect/Block/100of1000000-4 6000419 6000418 -0.00% BenchmarkQuerierSelect/Block/1000of1000000-4 6004019 6004018 -0.00% BenchmarkQuerierSelect/Block/10000of1000000-4 6040019 6040018 -0.00% BenchmarkQuerierSelect/Block/100000of1000000-4 6400019 6400018 -0.00% BenchmarkQuerierSelect/Block/1000000of1000000-4 10000020 10000019 -0.00% benchmark old bytes new bytes delta BenchmarkQuerierSelect/Head/1of1000000-4 229192200 176001176 -23.21% BenchmarkQuerierSelect/Head/10of1000000-4 229193352 176002328 -23.21% BenchmarkQuerierSelect/Head/100of1000000-4 229204872 176013848 -23.21% BenchmarkQuerierSelect/Head/1000of1000000-4 229320072 176129048 -23.20% BenchmarkQuerierSelect/Head/10000of1000000-4 230472072 177281048 -23.08% BenchmarkQuerierSelect/Head/100000of1000000-4 241992072 188801048 -21.98% BenchmarkQuerierSelect/Head/1000000of1000000-4 357192072 304001048 -14.89% BenchmarkQuerierSelect/SortedHead/1of1000000-4 229193928 229193928 +0.00% BenchmarkQuerierSelect/SortedHead/10of1000000-4 229195080 229195080 +0.00% BenchmarkQuerierSelect/SortedHead/100of1000000-4 229206600 229206600 +0.00% BenchmarkQuerierSelect/SortedHead/1000of1000000-4 229321800 229321800 +0.00% BenchmarkQuerierSelect/SortedHead/10000of1000000-4 230473800 230473800 +0.00% BenchmarkQuerierSelect/SortedHead/100000of1000000-4 241993800 241993800 +0.00% BenchmarkQuerierSelect/SortedHead/1000000of1000000-4 357193800 357193800 +0.00% BenchmarkQuerierSelect/Block/1of1000000-4 227201516 227201500 -0.00% BenchmarkQuerierSelect/Block/10of1000000-4 227202924 227202908 -0.00% BenchmarkQuerierSelect/Block/100of1000000-4 227217036 227217020 -0.00% BenchmarkQuerierSelect/Block/1000of1000000-4 227358156 227358140 -0.00% BenchmarkQuerierSelect/Block/10000of1000000-4 228769356 228769340 -0.00% BenchmarkQuerierSelect/Block/100000of1000000-4 242881356 242881340 -0.00% BenchmarkQuerierSelect/Block/1000000of1000000-4 384001616 384001600 -0.00% Signed-off-by: Brian Brazil <brian.brazil@robustperception.io>
2020-01-17 03:21:44 -08:00
"sort"
"strings"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/model/value"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/parser/posrange"
"github.com/prometheus/prometheus/promql/promqltest"
2018-02-15 04:08:00 -08:00
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/util/annotations"
"github.com/prometheus/prometheus/util/stats"
"github.com/prometheus/prometheus/util/testutil"
)
const (
env = "query execution"
defaultLookbackDelta = 5 * time.Minute
defaultEpsilon = 0.000001 // Relative error allowed for sample values.
)
func TestMain(m *testing.M) {
feat: add limitk() and limit_ratio() operators (#12503) * rebase 2024-07-01, picks previous renaming to `limitk()` and `limit_ratio()` Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * gofumpt -d -extra Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * more lint fixes Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * more lint fixes+ Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * put limitk() and limit_ratio() behind --enable-feature=promql-experimental-functions Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * EnableExperimentalFunctions for TestConcurrentRangeQueries() also Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * use testutil.RequireEqual to fix tests, WIP equivalent thingie for require.Contains Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * lint fix Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * moar linting Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * rebase 2024-06-19 Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * re-add limit(2, metric) testing for N=2 common series subset Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * move `ratio = param` to default switch case, for better readability Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * gofumpt -d -extra util/testutil/cmp.go Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * early break when reaching k elems in limitk(), should have always been so (!) Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * small typo fix Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * no-change small break-loop rearrange for readability Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * remove IsNan(ratio) condition in switch-case, already handled as input validation Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * no-change adding some comments Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * no-change simplify fullMatrix() helper functions used for tests Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * add `limitk(-1, metric)` testcase, which is handled as any k < 1 case Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * engine_test.go: no-change create `requireCommonSeries() helper func (moving code into it) for readability Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * rebase 2024-06-21 Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * engine_test.go: HAPPY NOW about its code -> reorg, create and use simpleRangeQuery() function, less lines and more readable ftW \o/ Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * move limitk(), limit_ratio() testing to promql/promqltest/testdata/limit.test Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * remove stale leftover after moving tests from engine_test.go to testdata/ Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * fix flaky `limit_ratio(0.5, ...)` test case Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * Update promql/engine.go Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * Update promql/engine.go Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * Update promql/engine.go Co-authored-by: Julius Volz <julius.volz@gmail.com> Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * fix AddRatioSample() implementation to use a single conditional (instead of switch/case + fallback return) Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * docs/querying/operators.md: document r < 0 Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * add negative limit_ratio() example to docs/querying/examples.md Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * move more extensive docu examples to docs/querying/operators.md Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * typo Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * small docu fix for poor-mans-normality-check, add it to limit.test ;) Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * limit.test: expand "Poor man's normality check" to whole eval range Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * restore mistakenly removed existing small comment Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * expand poors-man-normality-check case(s) Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * Revert "expand poors-man-normality-check case(s)" This reverts commit f69e1603b2ebe69c0a100197cfbcf6f81644b564, indeed too flaky 0:) Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * remove humor from docs/querying/operators.md Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * fix signoff Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * add web/ui missing changes Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * expand limit_ratio test cases, cross-fingering they'll not be flaky Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * remove flaky test Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * add missing warnings.Merge(ws) in instant-query return shortcut Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * add missing LimitK||LimitRatio case to codemirror-promql/src/parser/parser.ts Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * fix ui-lint Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> * actually fix returned warnings :] Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> --------- Signed-off-by: JuanJo Ciarlante <juanjosec@gmail.com> Co-authored-by: Julius Volz <julius.volz@gmail.com>
2024-07-03 13:18:57 -07:00
// Enable experimental functions testing
parser.EnableExperimentalFunctions = true
testutil.TolerantVerifyLeak(m)
}
func TestQueryConcurrency(t *testing.T) {
maxConcurrency := 10
queryTracker := promql.NewActiveQueryTracker(t.TempDir(), maxConcurrency, nil)
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 100 * time.Second,
ActiveQueryTracker: queryTracker,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
ctx, cancelCtx := context.WithCancel(context.Background())
t.Cleanup(cancelCtx)
block := make(chan struct{})
processing := make(chan struct{})
done := make(chan int)
t.Cleanup(func() {
close(done)
})
f := func(context.Context) error {
select {
case processing <- struct{}{}:
case <-done:
}
select {
case <-block:
case <-done:
}
return nil
}
var wg sync.WaitGroup
for i := 0; i < maxConcurrency; i++ {
q := engine.NewTestQuery(f)
wg.Add(1)
go func() {
q.Exec(ctx)
wg.Done()
}()
select {
case <-processing:
// Expected.
case <-time.After(20 * time.Millisecond):
require.Fail(t, "Query within concurrency threshold not being executed")
}
}
q := engine.NewTestQuery(f)
wg.Add(1)
go func() {
q.Exec(ctx)
wg.Done()
}()
select {
case <-processing:
require.Fail(t, "Query above concurrency threshold being executed")
case <-time.After(20 * time.Millisecond):
// Expected.
}
// Terminate a running query.
block <- struct{}{}
select {
case <-processing:
// Expected.
case <-time.After(20 * time.Millisecond):
require.Fail(t, "Query within concurrency threshold not being executed")
}
// Terminate remaining queries.
for i := 0; i < maxConcurrency; i++ {
block <- struct{}{}
}
wg.Wait()
}
// contextDone returns an error if the context was canceled or timed out.
func contextDone(ctx context.Context, env string) error {
if err := ctx.Err(); err != nil {
switch {
case errors.Is(err, context.Canceled):
return promql.ErrQueryCanceled(env)
case errors.Is(err, context.DeadlineExceeded):
return promql.ErrQueryTimeout(env)
default:
return err
}
}
return nil
}
func TestQueryTimeout(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 5 * time.Millisecond,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
query := engine.NewTestQuery(func(ctx context.Context) error {
time.Sleep(100 * time.Millisecond)
return contextDone(ctx, "test statement execution")
})
res := query.Exec(ctx)
require.Error(t, res.Err, "expected timeout error but got none")
var e promql.ErrQueryTimeout
require.ErrorAs(t, res.Err, &e, "expected timeout error but got: %s", res.Err)
}
const errQueryCanceled = promql.ErrQueryCanceled("test statement execution")
func TestQueryCancel(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
// Cancel a running query before it completes.
block := make(chan struct{})
processing := make(chan struct{})
query1 := engine.NewTestQuery(func(ctx context.Context) error {
processing <- struct{}{}
<-block
return contextDone(ctx, "test statement execution")
})
var res *promql.Result
go func() {
res = query1.Exec(ctx)
processing <- struct{}{}
}()
<-processing
query1.Cancel()
block <- struct{}{}
<-processing
require.Error(t, res.Err, "expected cancellation error for query1 but got none")
require.Equal(t, errQueryCanceled, res.Err)
// Canceling a query before starting it must have no effect.
query2 := engine.NewTestQuery(func(ctx context.Context) error {
return contextDone(ctx, "test statement execution")
})
query2.Cancel()
res = query2.Exec(ctx)
require.NoError(t, res.Err)
}
2018-02-15 04:08:00 -08:00
// errQuerier implements storage.Querier which always returns error.
type errQuerier struct {
err error
}
func (q *errQuerier) Select(context.Context, bool, *storage.SelectHints, ...*labels.Matcher) storage.SeriesSet {
*: Consistent Error/Warning handling for SeriesSet iterator: Allowing Async Select (#7251) * Add errors and Warnings to SeriesSet Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Change Querier interface and refactor accordingly Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Refactor promql/engine to propagate warnings at eval stage Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Address review issues Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Make sure all the series from all Selects are pre-advanced Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Address review issues Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Separate merge series sets Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Clean Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Refactor merge querier failure handling Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Refactored and simplified fanout with improvements from incoming chunk iterator PRs. * Secondary logic is hidden, instead of weird failed series set logic we had. * Fanout is well commented * Fanout closing record all errors * MergeQuerier improved API (clearer) * deferredGenericMergeSeriesSet is not needed as we return no samples anyway for failed series sets (next = false). Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Fix formatting Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Fix CI issues Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Added final tests for error handling. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Addressed Brian's comments. * Moved hints in populate to be allocated only when needed. * Used sync.Once in secondary Querier to achieve all-or-nothing partial response logic. * Select after first Next is done will panic. NOTE: in lazySeriesSet in theory we could just panic, I think however we can totally just return error, it will panic in expand anyway. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Utilize errWithWarnings Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Fix recently introduced expansion issue Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Add tests for secondary querier error handling Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Implement lazy merge Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Add name to test cases Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Reorganize Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Address review comments Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Address review comments Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Remove redundant warnings Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> * Fix rebase mistake Signed-off-by: Kemal Akkoyun <kakkoyun@gmail.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-06-09 09:57:31 -07:00
return errSeriesSet{err: q.err}
2018-02-15 04:08:00 -08:00
}
func (*errQuerier) LabelValues(context.Context, string, *storage.LabelHints, ...*labels.Matcher) ([]string, annotations.Annotations, error) {
Add matchers to LabelValues() call (#8400) * Accept matchers in querier LabelValues() Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * create matcher to only select metrics which have searched label Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * test case for merge querier with matchers Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * test LabelValues with matchers on head Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * add test for LabelValues on block Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * formatting fix Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Add comments Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * add missing lock release Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * remove unused parameter Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Benchmarks for LabelValues() methods on block/head Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Better comment Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * update comment Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * minor refactor make code cleaner Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * better comments Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * fix expected errors in test Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Deleting parameter which can only be empty Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * fix comments Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * remove unnecessary lock Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * only lookup label value if label name was looked up Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Return error when there is one Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Call .Get() on decoder before checking errors Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * only lock head.symMtx when necessary Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * remove unnecessary delete() Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * re-use code instead of duplicating it Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Consistently return error from LabelValueFor() Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * move helper func from util.go to querier.go Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * Fix test expectation Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * ensure result de-duplication and sorting works Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> * return named error from LabelValueFor() Signed-off-by: Mauro Stettler <mauro.stettler@gmail.com> Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-02-09 09:38:35 -08:00
return nil, nil, nil
}
func (*errQuerier) LabelNames(context.Context, *storage.LabelHints, ...*labels.Matcher) ([]string, annotations.Annotations, error) {
LabelNames API with matchers (#9083) * Push the matchers for LabelNames all the way into the index. NB This doesn't actually implement it in the index, just plumbs it through for now... Signed-off-by: Tom Wilkie <tom@grafana.com> * Hack it up. Does not work. Signed-off-by: Tom Wilkie <tom@grafana.com> * Revert changes I don't understand Can't see why do we need to hold a mutex on symbols, and the purpose of the LabelNamesFor method. Maybe I'll need to re-add this later. Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Implement LabelNamesFor This method provides the label names that appear in the postings provided. We do that deeper than the label values because we know beforehand that most of the label names we'll be the same across different postings, and we don't want to go down an up looking up the same symbols for all different series. Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Mutex on symbols should be unlocked However, I still don't understand why do we need a mutex here. Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Fix head.LabelNamesFor Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Implement mockIndex LabelNames with matchers Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Nitpick on slice initialisation Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Add tests for LabelNamesWithMatchers Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Fix the mutex mess on head.LabelValues/LabelNames I still don't see why we need to grab that unrelated mutex, but at least now we're grabbing it consistently Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Check error after iterating postings Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Use the error from posting when there was en error in postings Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Update storage/interface.go comment Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * Update tsdb/index/index.go comment Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * Update tsdb/index/index.go wrapped error msg Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * Update tsdb/index/index.go wrapped error msg Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * Update tsdb/index/index.go warpped error msg Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> * Remove unneeded comment Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Add testcases for LabelNames w/matchers in api.go Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> * Use t.Cleanup() instead of defer in tests Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com> Co-authored-by: Tom Wilkie <tom@grafana.com> Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-07-20 05:38:08 -07:00
return nil, nil, nil
}
func (*errQuerier) Close() error { return nil }
2018-02-15 04:08:00 -08:00
// errSeriesSet implements storage.SeriesSet which always returns error.
type errSeriesSet struct {
err error
}
func (errSeriesSet) Next() bool { return false }
func (errSeriesSet) At() storage.Series { return nil }
func (e errSeriesSet) Err() error { return e.err }
func (e errSeriesSet) Warnings() annotations.Annotations { return nil }
2018-02-15 04:08:00 -08:00
func TestQueryError(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
errStorage := promql.ErrStorage{errors.New("storage error")}
queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) {
2018-02-15 04:08:00 -08:00
return &errQuerier{err: errStorage}, nil
})
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
vectorQuery, err := engine.NewInstantQuery(ctx, queryable, nil, "foo", time.Unix(1, 0))
require.NoError(t, err)
2018-02-15 04:08:00 -08:00
res := vectorQuery.Exec(ctx)
require.Error(t, res.Err, "expected error on failed select but got none")
require.ErrorIs(t, res.Err, errStorage, "expected error doesn't match")
2018-02-15 04:08:00 -08:00
matrixQuery, err := engine.NewInstantQuery(ctx, queryable, nil, "foo[1m]", time.Unix(1, 0))
require.NoError(t, err)
2018-02-15 04:08:00 -08:00
res = matrixQuery.Exec(ctx)
require.Error(t, res.Err, "expected error on failed select but got none")
require.ErrorIs(t, res.Err, errStorage, "expected error doesn't match")
2018-02-15 04:08:00 -08:00
}
type noopHintRecordingQueryable struct {
hints []*storage.SelectHints
}
func (h *noopHintRecordingQueryable) Querier(int64, int64) (storage.Querier, error) {
return &hintRecordingQuerier{Querier: &errQuerier{}, h: h}, nil
}
type hintRecordingQuerier struct {
storage.Querier
h *noopHintRecordingQueryable
}
func (h *hintRecordingQuerier) Select(ctx context.Context, sortSeries bool, hints *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet {
h.h.hints = append(h.h.hints, hints)
return h.Querier.Select(ctx, sortSeries, hints, matchers...)
}
func TestSelectHintsSetCorrectly(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
LookbackDelta: 5 * time.Second,
EnableAtModifier: true,
}
for _, tc := range []struct {
query string
// All times are in milliseconds.
start int64
end int64
// TODO(bwplotka): Add support for better hints when subquerying.
expected []*storage.SelectHints
}{
{
query: "foo", start: 10000,
expected: []*storage.SelectHints{
{Start: 5001, End: 10000},
},
}, {
query: "foo @ 15", start: 10000,
expected: []*storage.SelectHints{
{Start: 10001, End: 15000},
},
}, {
query: "foo @ 1", start: 10000,
expected: []*storage.SelectHints{
{Start: -3999, End: 1000},
},
}, {
query: "foo[2m]", start: 200000,
expected: []*storage.SelectHints{
{Start: 80001, End: 200000, Range: 120000},
},
}, {
query: "foo[2m] @ 180", start: 200000,
expected: []*storage.SelectHints{
{Start: 60001, End: 180000, Range: 120000},
},
}, {
query: "foo[2m] @ 300", start: 200000,
expected: []*storage.SelectHints{
{Start: 180001, End: 300000, Range: 120000},
},
}, {
query: "foo[2m] @ 60", start: 200000,
expected: []*storage.SelectHints{
{Start: -59999, End: 60000, Range: 120000},
},
}, {
query: "foo[2m] offset 2m", start: 300000,
expected: []*storage.SelectHints{
{Start: 60001, End: 180000, Range: 120000},
},
}, {
query: "foo[2m] @ 200 offset 2m", start: 300000,
expected: []*storage.SelectHints{
{Start: -39999, End: 80000, Range: 120000},
},
}, {
query: "foo[2m:1s]", start: 300000,
expected: []*storage.SelectHints{
{Start: 175001, End: 300000, Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s])", start: 300000,
expected: []*storage.SelectHints{
{Start: 175001, End: 300000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 300)", start: 200000,
expected: []*storage.SelectHints{
{Start: 175001, End: 300000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 200)", start: 200000,
expected: []*storage.SelectHints{
{Start: 75001, End: 200000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 100)", start: 200000,
expected: []*storage.SelectHints{
{Start: -24999, End: 100000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000,
expected: []*storage.SelectHints{
{Start: 165001, End: 290000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000,
expected: []*storage.SelectHints{
{Start: 155001, End: 280000, Func: "count_over_time", Step: 1000},
},
}, {
// When the @ is on the vector selector, the enclosing subquery parameters
// don't affect the hint ranges.
query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000,
expected: []*storage.SelectHints{
{Start: 185001, End: 190000, Func: "count_over_time", Step: 1000},
},
}, {
// When the @ is on the vector selector, the enclosing subquery parameters
// don't affect the hint ranges.
query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000,
expected: []*storage.SelectHints{
{Start: 185001, End: 190000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000,
expected: []*storage.SelectHints{
{Start: -44999, End: 80000, Func: "count_over_time", Step: 1000},
},
}, {
query: "foo", start: 10000, end: 20000,
expected: []*storage.SelectHints{
{Start: 5001, End: 20000, Step: 1000},
},
}, {
query: "foo @ 15", start: 10000, end: 20000,
expected: []*storage.SelectHints{
{Start: 10001, End: 15000, Step: 1000},
},
}, {
query: "foo @ 1", start: 10000, end: 20000,
expected: []*storage.SelectHints{
{Start: -3999, End: 1000, Step: 1000},
},
}, {
query: "rate(foo[2m] @ 180)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: 60001, End: 180000, Range: 120000, Func: "rate", Step: 1000},
},
}, {
query: "rate(foo[2m] @ 300)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: 180001, End: 300000, Range: 120000, Func: "rate", Step: 1000},
},
}, {
query: "rate(foo[2m] @ 60)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: -59999, End: 60000, Range: 120000, Func: "rate", Step: 1000},
},
}, {
query: "rate(foo[2m])", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: 80001, End: 500000, Range: 120000, Func: "rate", Step: 1000},
},
}, {
query: "rate(foo[2m] offset 2m)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 60001, End: 380000, Range: 120000, Func: "rate", Step: 1000},
},
}, {
query: "rate(foo[2m:1s])", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 175001, End: 500000, Func: "rate", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s])", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 175001, End: 500000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 165001, End: 490000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 300)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: 175001, End: 300000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 200)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: 75001, End: 200000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time(foo[2m:1s] @ 100)", start: 200000, end: 500000,
expected: []*storage.SelectHints{
{Start: -24999, End: 100000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 155001, End: 480000, Func: "count_over_time", Step: 1000},
},
}, {
// When the @ is on the vector selector, the enclosing subquery parameters
// don't affect the hint ranges.
query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 185001, End: 190000, Func: "count_over_time", Step: 1000},
},
}, {
// When the @ is on the vector selector, the enclosing subquery parameters
// don't affect the hint ranges.
query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: 185001, End: 190000, Func: "count_over_time", Step: 1000},
},
}, {
query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000,
expected: []*storage.SelectHints{
{Start: -44999, End: 80000, Func: "count_over_time", Step: 1000},
},
}, {
query: "sum by (dim1) (foo)", start: 10000,
expected: []*storage.SelectHints{
{Start: 5001, End: 10000, Func: "sum", By: true, Grouping: []string{"dim1"}},
},
}, {
query: "sum without (dim1) (foo)", start: 10000,
expected: []*storage.SelectHints{
{Start: 5001, End: 10000, Func: "sum", Grouping: []string{"dim1"}},
},
}, {
query: "sum by (dim1) (avg_over_time(foo[1s]))", start: 10000,
expected: []*storage.SelectHints{
{Start: 9001, End: 10000, Func: "avg_over_time", Range: 1000},
},
}, {
query: "sum by (dim1) (max by (dim2) (foo))", start: 10000,
expected: []*storage.SelectHints{
{Start: 5001, End: 10000, Func: "max", By: true, Grouping: []string{"dim2"}},
},
}, {
query: "(max by (dim1) (foo))[5s:1s]", start: 10000,
expected: []*storage.SelectHints{
{Start: 1, End: 10000, Func: "max", By: true, Grouping: []string{"dim1"}, Step: 1000},
},
}, {
query: "(sum(http_requests{group=~\"p.*\"})+max(http_requests{group=~\"c.*\"}))[20s:5s]", start: 120000,
expected: []*storage.SelectHints{
{Start: 95001, End: 120000, Func: "sum", By: true, Step: 5000},
{Start: 95001, End: 120000, Func: "max", By: true, Step: 5000},
},
}, {
query: "foo @ 50 + bar @ 250 + baz @ 900", start: 100000, end: 500000,
expected: []*storage.SelectHints{
{Start: 45001, End: 50000, Step: 1000},
{Start: 245001, End: 250000, Step: 1000},
{Start: 895001, End: 900000, Step: 1000},
},
}, {
query: "foo @ 50 + bar + baz @ 900", start: 100000, end: 500000,
expected: []*storage.SelectHints{
{Start: 45001, End: 50000, Step: 1000},
{Start: 95001, End: 500000, Step: 1000},
{Start: 895001, End: 900000, Step: 1000},
},
}, {
query: "rate(foo[2s] @ 50) + bar @ 250 + baz @ 900", start: 100000, end: 500000,
expected: []*storage.SelectHints{
{Start: 48001, End: 50000, Step: 1000, Func: "rate", Range: 2000},
{Start: 245001, End: 250000, Step: 1000},
{Start: 895001, End: 900000, Step: 1000},
},
}, {
query: "rate(foo[2s:1s] @ 50) + bar + baz", start: 100000, end: 500000,
expected: []*storage.SelectHints{
{Start: 43001, End: 50000, Step: 1000, Func: "rate"},
{Start: 95001, End: 500000, Step: 1000},
{Start: 95001, End: 500000, Step: 1000},
},
}, {
query: "rate(foo[2s:1s] @ 50) + bar + rate(baz[2m:1s] @ 900 offset 2m) ", start: 100000, end: 500000,
expected: []*storage.SelectHints{
{Start: 43001, End: 50000, Step: 1000, Func: "rate"},
{Start: 95001, End: 500000, Step: 1000},
{Start: 655001, End: 780000, Step: 1000, Func: "rate"},
},
}, { // Hints are based on the inner most subquery timestamp.
query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 50)[3s:1s] @ 3000`, start: 100000,
expected: []*storage.SelectHints{
{Start: -149999, End: 50000, Range: 100000, Func: "sum_over_time", Step: 25000},
},
}, { // Hints are based on the inner most subquery timestamp.
query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 3000)[3s:1s] @ 50`,
expected: []*storage.SelectHints{
{Start: 2800001, End: 3000000, Range: 100000, Func: "sum_over_time", Step: 25000},
},
},
} {
t.Run(tc.query, func(t *testing.T) {
engine := promqltest.NewTestEngineWithOpts(t, opts)
hintsRecorder := &noopHintRecordingQueryable{}
var (
query promql.Query
err error
)
ctx := context.Background()
if tc.end == 0 {
query, err = engine.NewInstantQuery(ctx, hintsRecorder, nil, tc.query, timestamp.Time(tc.start))
} else {
query, err = engine.NewRangeQuery(ctx, hintsRecorder, nil, tc.query, timestamp.Time(tc.start), timestamp.Time(tc.end), time.Second)
}
require.NoError(t, err)
res := query.Exec(context.Background())
require.NoError(t, res.Err)
require.Equal(t, tc.expected, hintsRecorder.hints)
})
}
}
func TestEngineShutdown(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
ctx, cancelCtx := context.WithCancel(context.Background())
block := make(chan struct{})
processing := make(chan struct{})
// Shutdown engine on first handler execution. Should handler execution ever become
// concurrent this test has to be adjusted accordingly.
f := func(ctx context.Context) error {
processing <- struct{}{}
<-block
return contextDone(ctx, "test statement execution")
}
query1 := engine.NewTestQuery(f)
// Stopping the engine must cancel the base context. While executing queries is
// still possible, their context is canceled from the beginning and execution should
// terminate immediately.
var res *promql.Result
go func() {
res = query1.Exec(ctx)
processing <- struct{}{}
}()
<-processing
cancelCtx()
block <- struct{}{}
<-processing
require.Error(t, res.Err, "expected error on shutdown during query but got none")
require.Equal(t, errQueryCanceled, res.Err)
query2 := engine.NewTestQuery(func(context.Context) error {
require.FailNow(t, "reached query execution unexpectedly")
return nil
})
// The second query is started after the engine shut down. It must
// be canceled immediately.
res2 := query2.Exec(ctx)
require.Error(t, res2.Err, "expected error on querying with canceled context but got none")
var e promql.ErrQueryCanceled
require.ErrorAs(t, res2.Err, &e, "expected cancellation error but got: %s", res2.Err)
}
func TestEngineEvalStmtTimestamps(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 10s
metric 1 2
`)
t.Cleanup(func() { storage.Close() })
cases := []struct {
Query string
Result parser.Value
Start time.Time
End time.Time
Interval time.Duration
ShouldError bool
}{
// Instant queries.
{
Query: "1",
Result: promql.Scalar{V: 1, T: 1000},
Start: time.Unix(1, 0),
},
{
Query: "metric",
Result: promql.Vector{
promql.Sample{
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
F: 1,
T: 1000,
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(1, 0),
},
{
Query: "metric[20s]",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(10, 0),
},
// Range queries.
{
Query: "1",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}},
Metric: labels.EmptyLabels(),
},
},
Start: time.Unix(0, 0),
End: time.Unix(2, 0),
Interval: time.Second,
},
{
Query: "metric",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(0, 0),
End: time.Unix(2, 0),
Interval: time.Second,
},
{
Query: "metric",
Result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
Query: `count_values("wrong label!\xff", metric)`,
ShouldError: true,
},
}
for i, c := range cases {
t.Run(fmt.Sprintf("%d query=%s", i, c.Query), func(t *testing.T) {
var err error
var qry promql.Query
engine := newTestEngine(t)
if c.Interval == 0 {
qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start)
} else {
qry, err = engine.NewRangeQuery(context.Background(), storage, nil, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
res := qry.Exec(context.Background())
if c.ShouldError {
require.Error(t, res.Err, "expected error for the query %q", c.Query)
return
}
require.NoError(t, res.Err)
require.Equal(t, c.Result, res.Value, "query %q failed", c.Query)
})
}
}
func TestQueryStatistics(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 10s
metricWith1SampleEvery10Seconds 1+1x100
metricWith3SampleEvery10Seconds{a="1",b="1"} 1+1x100
metricWith3SampleEvery10Seconds{a="2",b="2"} 1+1x100
metricWith3SampleEvery10Seconds{a="3",b="2"} 1+1x100
metricWith1HistogramEvery10Seconds {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x100
`)
t.Cleanup(func() { storage.Close() })
cases := []struct {
Query string
SkipMaxCheck bool
TotalSamples int64
TotalSamplesPerStep stats.TotalSamplesPerStep
PeakSamples int
Start time.Time
End time.Time
Interval time.Duration
}{
{
Query: `"literal string"`,
SkipMaxCheck: true, // This can't fail from a max samples limit.
Start: time.Unix(21, 0),
TotalSamples: 0,
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 0,
},
},
{
Query: "1",
Start: time.Unix(21, 0),
TotalSamples: 0,
PeakSamples: 1,
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 0,
},
},
{
Query: "metricWith1SampleEvery10Seconds",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "metricWith1HistogramEvery10Seconds",
Start: time.Unix(21, 0),
PeakSamples: 13,
TotalSamples: 13, // 1 histogram HPoint of size 13 / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 13,
},
},
{
// timestamp function has a special handling.
Query: "timestamp(metricWith1SampleEvery10Seconds)",
Start: time.Unix(21, 0),
PeakSamples: 2,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "timestamp(metricWith1HistogramEvery10Seconds)",
Start: time.Unix(21, 0),
PeakSamples: 2,
TotalSamples: 1, // 1 float sample (because of timestamp) / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "metricWith1SampleEvery10Seconds",
Start: time.Unix(22, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
22000: 1, // Aligned to the step time, not the sample time.
},
},
{
Query: "metricWith1SampleEvery10Seconds offset 10s",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "metricWith1SampleEvery10Seconds @ 15",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"} @ 19`,
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}[20s] @ 19`,
Start: time.Unix(21, 0),
PeakSamples: 2,
TotalSamples: 2, // (1 sample / 10 seconds) * 20s
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 2,
},
},
{
Query: "metricWith3SampleEvery10Seconds",
Start: time.Unix(21, 0),
PeakSamples: 3,
TotalSamples: 3, // 3 samples / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 3,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s]",
Start: time.Unix(201, 0),
PeakSamples: 6,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "metricWith1HistogramEvery10Seconds[60s]",
Start: time.Unix(201, 0),
PeakSamples: 78,
TotalSamples: 78, // 1 histogram (size 13 HPoint) / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 78,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 10,
TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 4
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 24,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[61s])[20s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 11,
TotalSamples: 26, // (1 sample / 10 seconds * 60 seconds) * 4 + 2 as
// max_over_time(metricWith1SampleEvery10Seconds[61s]) @ 190 and 200 will return 7 samples.
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 26,
},
},
{
Query: "max_over_time(metricWith1HistogramEvery10Seconds[60s])[20s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 78,
TotalSamples: 312, // (1 histogram (size 13) / 10 seconds * 60 seconds) * 4
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 312,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s] @ 30",
Start: time.Unix(201, 0),
PeakSamples: 4,
TotalSamples: 4, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 1 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 4,
},
},
{
Query: "metricWith1HistogramEvery10Seconds[60s] @ 30",
Start: time.Unix(201, 0),
PeakSamples: 52,
TotalSamples: 52, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 1 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 52,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30))",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 12, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "sum by (b) (max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30))",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 12, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s] offset 10s",
Start: time.Unix(201, 0),
PeakSamples: 6,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "metricWith3SampleEvery10Seconds[60s]",
Start: time.Unix(201, 0),
PeakSamples: 18,
TotalSamples: 18, // 3 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "absent_over_time(metricWith1SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 9,
TotalSamples: 18, // 3 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 12,
TotalSamples: 12, // 1 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s:5s] offset 10s",
Start: time.Unix(201, 0),
PeakSamples: 12,
TotalSamples: 12, // 1 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
PeakSamples: 51,
TotalSamples: 36, // 3 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 36,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
PeakSamples: 52,
TotalSamples: 72, // 2 * (3 sample per query * 12 queries (60/5))
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 72,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(204, 0),
End: time.Unix(223, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
204000: 1, // aligned to the step time, not the sample time
209000: 1,
214000: 1,
219000: 1,
},
},
{
Query: `metricWith1HistogramEvery10Seconds`,
Start: time.Unix(204, 0),
End: time.Unix(223, 0),
Interval: 5 * time.Second,
PeakSamples: 52,
TotalSamples: 52, // 1 histogram (size 13 HPoint) per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
204000: 13, // aligned to the step time, not the sample time
209000: 13,
214000: 13,
219000: 13,
},
},
{
// timestamp function has a special handling
Query: "timestamp(metricWith1SampleEvery10Seconds)",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 5,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
// timestamp function has a special handling
Query: "timestamp(metricWith1HistogramEvery10Seconds)",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 5,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: `max_over_time(metricWith3SampleEvery10Seconds{a="1"}[10s])`,
Start: time.Unix(991, 0),
End: time.Unix(1021, 0),
Interval: 10 * time.Second,
PeakSamples: 2,
TotalSamples: 2, // 1 sample per query * 2 steps with data
TotalSamplesPerStep: stats.TotalSamplesPerStep{
991000: 1,
1001000: 1,
1011000: 0,
1021000: 0,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"} offset 10s`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30)",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 12,
TotalSamples: 48, // @ modifier force the evaluation timestamp at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: `metricWith3SampleEvery10Seconds`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
PeakSamples: 12,
Interval: 5 * time.Second,
TotalSamples: 12, // 3 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 3,
206000: 3,
211000: 3,
216000: 3,
},
},
{
Query: `max_over_time(metricWith3SampleEvery10Seconds[60s])`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 18,
TotalSamples: 72, // (3 sample / 10 seconds * 60 seconds) * 4 steps = 72
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
206000: 18,
211000: 18,
216000: 18,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 72,
TotalSamples: 144, // 3 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 36,
206000: 36,
211000: 36,
216000: 36,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 32,
TotalSamples: 48, // 1 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: "sum by (b) (max_over_time(metricWith1SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 32,
TotalSamples: 48, // 1 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 76,
TotalSamples: 288, // 2 * (3 sample per query * 12 queries (60/5) * 4 steps)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 72,
206000: 72,
211000: 72,
216000: 72,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith1SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 72,
TotalSamples: 192, // (1 sample per query * 12 queries (60/5) + 3 sample per query * 12 queries (60/5)) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 48,
206000: 48,
211000: 48,
216000: 48,
},
},
}
for _, c := range cases {
t.Run(c.Query, func(t *testing.T) {
opts := promql.NewPrometheusQueryOpts(true, 0)
engine := promqltest.NewTestEngine(t, true, 0, promqltest.DefaultMaxSamplesPerQuery)
runQuery := func(expErr error) *stats.Statistics {
var err error
var qry promql.Query
if c.Interval == 0 {
qry, err = engine.NewInstantQuery(context.Background(), storage, opts, c.Query, c.Start)
} else {
qry, err = engine.NewRangeQuery(context.Background(), storage, opts, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
res := qry.Exec(context.Background())
require.Equal(t, expErr, res.Err)
return qry.Stats()
}
stats := runQuery(nil)
require.Equal(t, c.TotalSamples, stats.Samples.TotalSamples, "Total samples mismatch")
require.Equal(t, &c.TotalSamplesPerStep, stats.Samples.TotalSamplesPerStepMap(), "Total samples per time mismatch")
require.Equal(t, c.PeakSamples, stats.Samples.PeakSamples, "Peak samples mismatch")
// Check that the peak is correct by setting the max to one less.
if c.SkipMaxCheck {
return
}
engine = promqltest.NewTestEngine(t, true, 0, stats.Samples.PeakSamples-1)
runQuery(promql.ErrTooManySamples(env))
})
}
}
func TestMaxQuerySamples(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 10s
metric 1+1x100
bigmetric{a="1"} 1+1x100
bigmetric{a="2"} 1+1x100
`)
t.Cleanup(func() { storage.Close() })
// These test cases should be touching the limit exactly (hence no exceeding).
// Exceeding the limit will be tested by doing -1 to the MaxSamples.
cases := []struct {
Query string
MaxSamples int
Start time.Time
End time.Time
Interval time.Duration
}{
// Instant queries.
{
Query: "1",
MaxSamples: 1,
Start: time.Unix(1, 0),
},
{
Query: "metric",
MaxSamples: 1,
Start: time.Unix(1, 0),
},
{
Query: "metric[20s]",
MaxSamples: 2,
Start: time.Unix(10, 0),
},
{
Query: "rate(metric[20s])",
MaxSamples: 3,
Start: time.Unix(10, 0),
},
{
Query: "metric[20s:5s]",
MaxSamples: 3,
Start: time.Unix(10, 0),
},
{
Query: "metric[20s] @ 10",
MaxSamples: 2,
Start: time.Unix(0, 0),
},
// Range queries.
{
Query: "1",
MaxSamples: 3,
Start: time.Unix(0, 0),
End: time.Unix(2, 0),
Interval: time.Second,
},
{
Query: "1",
MaxSamples: 3,
Start: time.Unix(0, 0),
End: time.Unix(2, 0),
Interval: time.Second,
},
{
Query: "metric",
MaxSamples: 3,
Start: time.Unix(0, 0),
End: time.Unix(2, 0),
Interval: time.Second,
},
{
Query: "metric",
MaxSamples: 3,
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
Query: "rate(bigmetric[1s])",
MaxSamples: 1,
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
// Result is duplicated, so @ also produces 3 samples.
Query: "metric @ 10",
MaxSamples: 3,
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
// The peak samples in memory is during the first evaluation:
// - Subquery takes 20 samples, 10 for each bigmetric.
// - Result is calculated per series where the series samples is buffered, hence 10 more here.
// - The result of two series is added before the last series buffer is discarded, so 2 more here.
// Hence at peak it is 20 (subquery) + 10 (buffer of a series) + 2 (result from 2 series).
// The subquery samples and the buffer is discarded before duplicating.
Query: `rate(bigmetric[10s:1s] @ 10)`,
MaxSamples: 32,
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
// Here the reasoning is same as above. But LHS and RHS are done one after another.
// So while one of them takes 32 samples at peak, we need to hold the 2 sample
// result of the other till then.
Query: `rate(bigmetric[10s:1s] @ 10) + rate(bigmetric[10s:1s] @ 30)`,
MaxSamples: 34,
Start: time.Unix(0, 0),
End: time.Unix(10, 0),
Interval: 5 * time.Second,
},
{
// promql.Sample as above but with only 1 part as step invariant.
// Here the peak is caused by the non-step invariant part as it touches more time range.
// Hence at peak it is 2*20 (subquery from 0s to 20s)
// + 10 (buffer of a series per evaluation)
// + 6 (result from 2 series at 3 eval times).
Query: `rate(bigmetric[10s:1s]) + rate(bigmetric[10s:1s] @ 30)`,
MaxSamples: 56,
Start: time.Unix(10, 0),
End: time.Unix(20, 0),
Interval: 5 * time.Second,
},
{
// Nested subquery.
// We saw that innermost rate takes 32 samples which is still the peak
// since the other two subqueries just duplicate the result.
Query: `rate(rate(bigmetric[10:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`,
MaxSamples: 32,
Start: time.Unix(10, 0),
},
{
// Nested subquery.
// Now the outermost subquery produces more samples than innermost rate.
Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[17s:1s] @ 2000`,
MaxSamples: 34,
Start: time.Unix(10, 0),
},
}
for _, c := range cases {
t.Run(c.Query, func(t *testing.T) {
engine := newTestEngine(t)
testFunc := func(expError error) {
var err error
var qry promql.Query
if c.Interval == 0 {
qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start)
} else {
qry, err = engine.NewRangeQuery(context.Background(), storage, nil, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
res := qry.Exec(context.Background())
stats := qry.Stats()
require.Equal(t, expError, res.Err)
require.NotNil(t, stats)
if expError == nil {
require.Equal(t, c.MaxSamples, stats.Samples.PeakSamples, "peak samples mismatch for query %q", c.Query)
}
}
// Within limit.
engine = promqltest.NewTestEngine(t, false, 0, c.MaxSamples)
testFunc(nil)
// Exceeding limit.
engine = promqltest.NewTestEngine(t, false, 0, c.MaxSamples-1)
testFunc(promql.ErrTooManySamples(env))
})
}
}
func TestAtModifier(t *testing.T) {
engine := newTestEngine(t)
storage := promqltest.LoadedStorage(t, `
load 10s
metric{job="1"} 0+1x1000
metric{job="2"} 0+2x1000
metric_topk{instance="1"} 0+1x1000
metric_topk{instance="2"} 0+2x1000
metric_topk{instance="3"} 1000-1x1000
load 1ms
metric_ms 0+1x10000
`)
t.Cleanup(func() { storage.Close() })
lbls1 := labels.FromStrings("__name__", "metric", "job", "1")
lbls2 := labels.FromStrings("__name__", "metric", "job", "2")
lblstopk2 := labels.FromStrings("__name__", "metric_topk", "instance", "2")
lblstopk3 := labels.FromStrings("__name__", "metric_topk", "instance", "3")
lblsms := labels.FromStrings("__name__", "metric_ms")
lblsneg := labels.FromStrings("__name__", "metric_neg")
// Add some samples with negative timestamp.
db := storage.DB
app := db.Appender(context.Background())
ref, err := app.Append(0, lblsneg, -1000000, 1000)
require.NoError(t, err)
for ts := int64(-1000000 + 1000); ts <= 0; ts += 1000 {
_, err := app.Append(ref, labels.EmptyLabels(), ts, -float64(ts/1000)+1)
require.NoError(t, err)
}
// To test the fix for https://github.com/prometheus/prometheus/issues/8433.
_, err = app.Append(0, labels.FromStrings("__name__", "metric_timestamp"), 3600*1000, 1000)
require.NoError(t, err)
require.NoError(t, app.Commit())
cases := []struct {
query string
start, end, interval int64 // Time in seconds.
result parser.Value
}{
{ // Time of the result is the evaluation time.
query: `metric_neg @ 0`,
start: 100,
result: promql.Vector{
promql.Sample{F: 1, T: 100000, Metric: lblsneg},
},
}, {
query: `metric_neg @ -200`,
start: 100,
result: promql.Vector{
promql.Sample{F: 201, T: 100000, Metric: lblsneg},
},
}, {
query: `metric{job="2"} @ 50`,
start: -2, end: 2, interval: 1,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 10, T: -2000}, {F: 10, T: -1000}, {F: 10, T: 0}, {F: 10, T: 1000}, {F: 10, T: 2000}},
Metric: lbls2,
},
},
}, { // Timestamps for matrix selector does not depend on the evaluation time.
query: "metric[20s] @ 300",
start: 10,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 29, T: 290000}, {F: 30, T: 300000}},
Metric: lbls1,
},
promql.Series{
Floats: []promql.FPoint{{F: 58, T: 290000}, {F: 60, T: 300000}},
Metric: lbls2,
},
},
}, {
query: `metric_neg[2s] @ 0`,
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2, T: -1000}, {F: 1, T: 0}},
Metric: lblsneg,
},
},
}, {
query: `metric_neg[3s] @ -500`,
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}},
Metric: lblsneg,
},
},
}, {
query: `metric_ms[3ms] @ 2.345`,
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}},
Metric: lblsms,
},
},
}, {
query: "metric[100s:25s] @ 300",
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}},
Metric: lbls1,
},
promql.Series{
Floats: []promql.FPoint{{F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}},
Metric: lbls2,
},
},
}, {
query: "metric[100s1ms:25s] @ 300", // Add 1ms to the range to see the legacy behavior of the previous test.
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}},
Metric: lbls1,
},
promql.Series{
Floats: []promql.FPoint{{F: 40, T: 200000}, {F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}},
Metric: lbls2,
},
},
}, {
query: "metric_neg[50s:25s] @ 0",
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 26, T: -25000}, {F: 1, T: 0}},
Metric: lblsneg,
},
},
}, {
query: "metric_neg[50s1ms:25s] @ 0", // Add 1ms to the range to see the legacy behavior of the previous test.
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}},
Metric: lblsneg,
},
},
}, {
query: "metric_neg[50s:25s] @ -100",
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 126, T: -125000}, {F: 101, T: -100000}},
Metric: lblsneg,
},
},
}, {
query: "metric_neg[50s1ms:25s] @ -100", // Add 1ms to the range to see the legacy behavior of the previous test.
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}},
Metric: lblsneg,
},
},
}, {
query: `metric_ms[101ms:25ms] @ 2.345`,
start: 100,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2250, T: 2250}, {F: 2275, T: 2275}, {F: 2300, T: 2300}, {F: 2325, T: 2325}},
Metric: lblsms,
},
},
}, {
query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 100))`,
start: 50, end: 80, interval: 10,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 995, T: 50000}, {F: 994, T: 60000}, {F: 993, T: 70000}, {F: 992, T: 80000}},
Metric: lblstopk3,
},
},
}, {
query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 5000))`,
start: 50, end: 80, interval: 10,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 10, T: 50000}, {F: 12, T: 60000}, {F: 14, T: 70000}, {F: 16, T: 80000}},
Metric: lblstopk2,
},
},
}, {
query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ end()))`,
start: 70, end: 100, interval: 10,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 993, T: 70000}, {F: 992, T: 80000}, {F: 991, T: 90000}, {F: 990, T: 100000}},
Metric: lblstopk3,
},
},
}, {
query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ start()))`,
start: 100, end: 130, interval: 10,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 990, T: 100000}, {F: 989, T: 110000}, {F: 988, T: 120000}, {F: 987, T: 130000}},
Metric: lblstopk3,
},
},
}, {
// Tests for https://github.com/prometheus/prometheus/issues/8433.
// The trick here is that the query range should be > lookback delta.
query: `timestamp(metric_timestamp @ 3600)`,
start: 0, end: 7 * 60, interval: 60,
result: promql.Matrix{
promql.Series{
Floats: []promql.FPoint{
promql: Separate `Point` into `FPoint` and `HPoint` In other words: Instead of having a “polymorphous” `Point` that can either contain a float value or a histogram value, use an `FPoint` for floats and an `HPoint` for histograms. This seemingly small change has a _lot_ of repercussions throughout the codebase. The idea here is to avoid the increase in size of `Point` arrays that happened after native histograms had been added. The higher-level data structures (`Sample`, `Series`, etc.) are still “polymorphous”. The same idea could be applied to them, but at each step the trade-offs needed to be evaluated. The idea with this change is to do the minimum necessary to get back to pre-histogram performance for functions that do not touch histograms. Here are comparisons for the `changes` function. The test data doesn't include histograms yet. Ideally, there would be no change in the benchmark result at all. First runtime v2.39 compared to directly prior to this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 542µs ± 1% +38.58% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 617µs ± 2% +36.48% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.36ms ± 2% +21.58% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 8.94ms ± 1% +14.21% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.30ms ± 1% +10.67% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.10ms ± 1% +11.82% (p=0.000 n=10+10) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 11.8ms ± 1% +12.50% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 87.4ms ± 1% +12.63% (p=0.000 n=9+9) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 32.8ms ± 1% +8.01% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.6ms ± 2% +9.64% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 117ms ± 1% +11.69% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 876ms ± 1% +11.83% (p=0.000 n=9+10) ``` And then runtime v2.39 compared to after this commit: ``` name old time/op new time/op delta RangeQuery/expr=changes(a_one[1d]),steps=1-16 391µs ± 2% 547µs ± 1% +39.84% (p=0.000 n=9+8) RangeQuery/expr=changes(a_one[1d]),steps=10-16 452µs ± 2% 616µs ± 2% +36.15% (p=0.000 n=10+10) RangeQuery/expr=changes(a_one[1d]),steps=100-16 1.12ms ± 1% 1.26ms ± 1% +12.20% (p=0.000 n=8+10) RangeQuery/expr=changes(a_one[1d]),steps=1000-16 7.83ms ± 1% 7.95ms ± 1% +1.59% (p=0.000 n=10+8) RangeQuery/expr=changes(a_ten[1d]),steps=1-16 2.98ms ± 0% 3.38ms ± 2% +13.49% (p=0.000 n=9+10) RangeQuery/expr=changes(a_ten[1d]),steps=10-16 3.66ms ± 1% 4.02ms ± 1% +9.80% (p=0.000 n=10+9) RangeQuery/expr=changes(a_ten[1d]),steps=100-16 10.5ms ± 0% 10.8ms ± 1% +3.08% (p=0.000 n=8+10) RangeQuery/expr=changes(a_ten[1d]),steps=1000-16 77.6ms ± 1% 78.1ms ± 1% +0.58% (p=0.035 n=9+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1-16 30.4ms ± 2% 33.5ms ± 4% +10.18% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=10-16 37.1ms ± 2% 40.0ms ± 1% +7.98% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=100-16 105ms ± 1% 107ms ± 1% +1.92% (p=0.000 n=10+10) RangeQuery/expr=changes(a_hundred[1d]),steps=1000-16 783ms ± 3% 775ms ± 1% -1.02% (p=0.019 n=9+9) ``` In summary, the runtime doesn't really improve with this change for queries with just a few steps. For queries with many steps, this commit essentially reinstates the old performance. This is good because the many-step queries are the one that matter most (longest absolute runtime). In terms of allocations, though, this commit doesn't make a dent at all (numbers not shown). The reason is that most of the allocations happen in the sampleRingIterator (in the storage package), which has to be addressed in a separate commit. Signed-off-by: beorn7 <beorn@grafana.com>
2022-10-28 07:58:40 -07:00
{F: 3600, T: 0},
{F: 3600, T: 60 * 1000},
{F: 3600, T: 2 * 60 * 1000},
{F: 3600, T: 3 * 60 * 1000},
{F: 3600, T: 4 * 60 * 1000},
{F: 3600, T: 5 * 60 * 1000},
{F: 3600, T: 6 * 60 * 1000},
{F: 3600, T: 7 * 60 * 1000},
},
PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell <jcreixell@gmail.com> --------- Signed-off-by: Jorge Creixell <jcreixell@gmail.com> Signed-off-by: Björn Rabenstein <github@rabenste.in>
2024-08-29 06:50:39 -07:00
Metric: labels.EmptyLabels(),
DropName: true,
},
},
},
}
for _, c := range cases {
t.Run(c.query, func(t *testing.T) {
if c.interval == 0 {
c.interval = 1
}
start, end, interval := time.Unix(c.start, 0), time.Unix(c.end, 0), time.Duration(c.interval)*time.Second
var err error
var qry promql.Query
if c.end == 0 {
qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.query, start)
} else {
qry, err = engine.NewRangeQuery(context.Background(), storage, nil, c.query, start, end, interval)
}
require.NoError(t, err)
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
if expMat, ok := c.result.(promql.Matrix); ok {
sort.Sort(expMat)
sort.Sort(res.Value.(promql.Matrix))
}
testutil.RequireEqual(t, c.result, res.Value, "query %q failed", c.query)
})
}
}
func TestSubquerySelector(t *testing.T) {
type caseType struct {
Query string
Result promql.Result
Start time.Time
}
for _, tst := range []struct {
loadString string
cases []caseType
}{
{
loadString: `load 10s
metric 1 2`,
cases: []caseType{
{
Query: "metric[20s:10s]",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(10, 0),
},
{
Query: "metric[20s:5s]",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(10, 0),
},
{
Query: "metric[20s:5s] offset 2s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(12, 0),
},
{
Query: "metric[20s:5s] offset 6s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(20, 0),
},
{
Query: "metric[20s:5s] offset 4s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(35, 0),
},
{
Query: "metric[20s:5s] offset 5s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(35, 0),
},
{
Query: "metric[20s:5s] offset 6s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(35, 0),
},
{
Query: "metric[20s:5s] offset 7s",
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}},
Metric: labels.FromStrings("__name__", "metric"),
},
},
nil,
},
Start: time.Unix(35, 0),
},
},
},
{
loadString: `load 10s
http_requests{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000
http_requests{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000
http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000
http_requests{job="api-server", instance="1", group="canary"} 0+40x2000`,
cases: []caseType{
{ // Normal selector.
Query: `http_requests{group=~"pro.*",instance="0"}[30s:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}},
Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"),
},
},
nil,
},
Start: time.Unix(10020, 0),
},
{ // Normal selector. Add 1ms to the range to see the legacy behavior of the previous test.
Query: `http_requests{group=~"pro.*",instance="0"}[30s1ms:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 9990, T: 9990000}, {F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}},
Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"),
},
},
nil,
},
Start: time.Unix(10020, 0),
},
{ // Default step.
Query: `http_requests{group=~"pro.*",instance="0"}[5m:]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 9840, T: 9840000}, {F: 9900, T: 9900000}, {F: 9960, T: 9960000}, {F: 130, T: 10020000}, {F: 310, T: 10080000}},
Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"),
},
},
nil,
},
Start: time.Unix(10100, 0),
},
{ // Checking if high offset (>LookbackDelta) is being taken care of.
Query: `http_requests{group=~"pro.*",instance="0"}[5m:] offset 20m`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 8640, T: 8640000}, {F: 8700, T: 8700000}, {F: 8760, T: 8760000}, {F: 8820, T: 8820000}, {F: 8880, T: 8880000}},
Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"),
},
},
nil,
},
Start: time.Unix(10100, 0),
},
{
Query: `rate(http_requests[1m])[15s:5s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"),
DropName: true,
},
promql.Series{
Floats: []promql.FPoint{{F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"),
DropName: true,
},
promql.Series{
Floats: []promql.FPoint{{F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"),
DropName: true,
},
promql.Series{
Floats: []promql.FPoint{{F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"),
DropName: true,
},
},
nil,
},
Start: time.Unix(8000, 0),
},
{
Query: `rate(http_requests[1m])[15s1ms:5s]`, // Add 1ms to the range to see the legacy behavior of the previous test.
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell <jcreixell@gmail.com> --------- Signed-off-by: Jorge Creixell <jcreixell@gmail.com> Signed-off-by: Björn Rabenstein <github@rabenste.in>
2024-08-29 06:50:39 -07:00
Floats: []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"),
DropName: true,
},
promql.Series{
PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell <jcreixell@gmail.com> --------- Signed-off-by: Jorge Creixell <jcreixell@gmail.com> Signed-off-by: Björn Rabenstein <github@rabenste.in>
2024-08-29 06:50:39 -07:00
Floats: []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"),
DropName: true,
},
promql.Series{
PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell <jcreixell@gmail.com> --------- Signed-off-by: Jorge Creixell <jcreixell@gmail.com> Signed-off-by: Björn Rabenstein <github@rabenste.in>
2024-08-29 06:50:39 -07:00
Floats: []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"),
DropName: true,
},
promql.Series{
PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell <jcreixell@gmail.com> --------- Signed-off-by: Jorge Creixell <jcreixell@gmail.com> Signed-off-by: Björn Rabenstein <github@rabenste.in>
2024-08-29 06:50:39 -07:00
Floats: []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}},
Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"),
DropName: true,
},
},
nil,
},
Start: time.Unix(8000, 0),
},
{
Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
{
Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(121, 0), // 1s later doesn't change the result.
},
{
// Add 1ms to the range to see the legacy behavior of the previous test.
Query: `sum(http_requests{group=~"pro.*"})[30s1ms:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 270, T: 90000}, {F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
{
Query: `sum(http_requests)[40s:10s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
{
Query: `sum(http_requests)[40s1ms:10s]`, // Add 1ms to the range to see the legacy behavior of the previous test.
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 800, T: 80000}, {F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
{
Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s:5s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
{
// Add 1ms to the range to see the legacy behavior of the previous test.
Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s1ms:5s]`,
Result: promql.Result{
nil,
promql.Matrix{
promql.Series{
Floats: []promql.FPoint{{F: 1000, T: 100000}, {F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}},
Metric: labels.EmptyLabels(),
},
},
nil,
},
Start: time.Unix(120, 0),
},
},
},
} {
promql: Removed global and add ability to have better interval for subqueries if not specified (#7628) * promql: Removed global and add ability to have better interval for subqueries if not specified ## Changes * Refactored tests for better hints testing * Added various TODO in places to enhance. * Moved DefaultEvalInterval global to opts with func(rangeMillis int64) int64 function instead Motivation: At Thanos we would love to have better control over the subqueries step/interval. This is important to choose proper resolution. I think having proper step also does not harm for Prometheus and remote read users. Especially on stateless querier we do not know evaluation interval and in fact putting global can be wrong to assume for Prometheus even. I think ideally we could try to have at least 3 samples within the range, the same way Prometheus UI and Grafana assumes. Anyway this interfaces allows to decide on promQL user basis. Open question: Is taking parent interval a smart move? Motivation for removing global: I spent 1h fighting with: === RUN TestEvaluations TestEvaluations: promql_test.go:31: unexpected error: error evaluating query "absent_over_time(rate(nonexistant[5m])[5m:])" (line 687): unexpected error: runtime error: integer divide by zero --- FAIL: TestEvaluations (0.32s) FAIL At the end I found that this fails on most of the versions including this master if you run this test alone. If run together with many other tests it passes. This is due to SetDefaultEvaluationInterval(1 * time.Minute) in test that is ran before TestEvaluations. Thanks to globals (: Let's fix it by dropping this global. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Added issue links for TODOs. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed irrelevant changes. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-07-22 06:39:51 -07:00
t.Run("", func(t *testing.T) {
engine := newTestEngine(t)
storage := promqltest.LoadedStorage(t, tst.loadString)
t.Cleanup(func() { storage.Close() })
promql: Removed global and add ability to have better interval for subqueries if not specified (#7628) * promql: Removed global and add ability to have better interval for subqueries if not specified ## Changes * Refactored tests for better hints testing * Added various TODO in places to enhance. * Moved DefaultEvalInterval global to opts with func(rangeMillis int64) int64 function instead Motivation: At Thanos we would love to have better control over the subqueries step/interval. This is important to choose proper resolution. I think having proper step also does not harm for Prometheus and remote read users. Especially on stateless querier we do not know evaluation interval and in fact putting global can be wrong to assume for Prometheus even. I think ideally we could try to have at least 3 samples within the range, the same way Prometheus UI and Grafana assumes. Anyway this interfaces allows to decide on promQL user basis. Open question: Is taking parent interval a smart move? Motivation for removing global: I spent 1h fighting with: === RUN TestEvaluations TestEvaluations: promql_test.go:31: unexpected error: error evaluating query "absent_over_time(rate(nonexistant[5m])[5m:])" (line 687): unexpected error: runtime error: integer divide by zero --- FAIL: TestEvaluations (0.32s) FAIL At the end I found that this fails on most of the versions including this master if you run this test alone. If run together with many other tests it passes. This is due to SetDefaultEvaluationInterval(1 * time.Minute) in test that is ran before TestEvaluations. Thanks to globals (: Let's fix it by dropping this global. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Added issue links for TODOs. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed irrelevant changes. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-07-22 06:39:51 -07:00
for _, c := range tst.cases {
t.Run(c.Query, func(t *testing.T) {
qry, err := engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start)
require.NoError(t, err)
promql: Removed global and add ability to have better interval for subqueries if not specified (#7628) * promql: Removed global and add ability to have better interval for subqueries if not specified ## Changes * Refactored tests for better hints testing * Added various TODO in places to enhance. * Moved DefaultEvalInterval global to opts with func(rangeMillis int64) int64 function instead Motivation: At Thanos we would love to have better control over the subqueries step/interval. This is important to choose proper resolution. I think having proper step also does not harm for Prometheus and remote read users. Especially on stateless querier we do not know evaluation interval and in fact putting global can be wrong to assume for Prometheus even. I think ideally we could try to have at least 3 samples within the range, the same way Prometheus UI and Grafana assumes. Anyway this interfaces allows to decide on promQL user basis. Open question: Is taking parent interval a smart move? Motivation for removing global: I spent 1h fighting with: === RUN TestEvaluations TestEvaluations: promql_test.go:31: unexpected error: error evaluating query "absent_over_time(rate(nonexistant[5m])[5m:])" (line 687): unexpected error: runtime error: integer divide by zero --- FAIL: TestEvaluations (0.32s) FAIL At the end I found that this fails on most of the versions including this master if you run this test alone. If run together with many other tests it passes. This is due to SetDefaultEvaluationInterval(1 * time.Minute) in test that is ran before TestEvaluations. Thanks to globals (: Let's fix it by dropping this global. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Added issue links for TODOs. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed irrelevant changes. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-07-22 06:39:51 -07:00
res := qry.Exec(context.Background())
require.Equal(t, c.Result.Err, res.Err)
mat := res.Value.(promql.Matrix)
promql: Removed global and add ability to have better interval for subqueries if not specified (#7628) * promql: Removed global and add ability to have better interval for subqueries if not specified ## Changes * Refactored tests for better hints testing * Added various TODO in places to enhance. * Moved DefaultEvalInterval global to opts with func(rangeMillis int64) int64 function instead Motivation: At Thanos we would love to have better control over the subqueries step/interval. This is important to choose proper resolution. I think having proper step also does not harm for Prometheus and remote read users. Especially on stateless querier we do not know evaluation interval and in fact putting global can be wrong to assume for Prometheus even. I think ideally we could try to have at least 3 samples within the range, the same way Prometheus UI and Grafana assumes. Anyway this interfaces allows to decide on promQL user basis. Open question: Is taking parent interval a smart move? Motivation for removing global: I spent 1h fighting with: === RUN TestEvaluations TestEvaluations: promql_test.go:31: unexpected error: error evaluating query "absent_over_time(rate(nonexistant[5m])[5m:])" (line 687): unexpected error: runtime error: integer divide by zero --- FAIL: TestEvaluations (0.32s) FAIL At the end I found that this fails on most of the versions including this master if you run this test alone. If run together with many other tests it passes. This is due to SetDefaultEvaluationInterval(1 * time.Minute) in test that is ran before TestEvaluations. Thanks to globals (: Let's fix it by dropping this global. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Added issue links for TODOs. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed irrelevant changes. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-07-22 06:39:51 -07:00
sort.Sort(mat)
testutil.RequireEqual(t, c.Result.Value, mat)
promql: Removed global and add ability to have better interval for subqueries if not specified (#7628) * promql: Removed global and add ability to have better interval for subqueries if not specified ## Changes * Refactored tests for better hints testing * Added various TODO in places to enhance. * Moved DefaultEvalInterval global to opts with func(rangeMillis int64) int64 function instead Motivation: At Thanos we would love to have better control over the subqueries step/interval. This is important to choose proper resolution. I think having proper step also does not harm for Prometheus and remote read users. Especially on stateless querier we do not know evaluation interval and in fact putting global can be wrong to assume for Prometheus even. I think ideally we could try to have at least 3 samples within the range, the same way Prometheus UI and Grafana assumes. Anyway this interfaces allows to decide on promQL user basis. Open question: Is taking parent interval a smart move? Motivation for removing global: I spent 1h fighting with: === RUN TestEvaluations TestEvaluations: promql_test.go:31: unexpected error: error evaluating query "absent_over_time(rate(nonexistant[5m])[5m:])" (line 687): unexpected error: runtime error: integer divide by zero --- FAIL: TestEvaluations (0.32s) FAIL At the end I found that this fails on most of the versions including this master if you run this test alone. If run together with many other tests it passes. This is due to SetDefaultEvaluationInterval(1 * time.Minute) in test that is ran before TestEvaluations. Thanks to globals (: Let's fix it by dropping this global. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Added issue links for TODOs. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> * Removed irrelevant changes. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>
2020-07-22 06:39:51 -07:00
})
}
})
}
}
type FakeQueryLogger struct {
closed bool
logs []interface{}
attrs []any
}
func NewFakeQueryLogger() *FakeQueryLogger {
return &FakeQueryLogger{
closed: false,
logs: make([]interface{}, 0),
attrs: make([]any, 0),
}
}
// It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) Close() error {
f.closed = true
return nil
}
// It implements the promql.QueryLogger interface.
fix!: stop unbounded memory usage from query log Resolves: #15433 When I converted prometheus to use slog in #14906, I update both the `QueryLogger` interface, as well as how the log calls to the `QueryLogger` were built up in `promql.Engine.exec()`. The backing logger for the `QueryLogger` in the engine is a `util/logging.JSONFileLogger`, and it's implementation of the `With()` method updates the logger the logger in place with the new keyvals added onto the underlying slog.Logger, which means they get inherited onto everything after. All subsequent calls to `With()`, even in later queries, would continue to then append on more and more keyvals for the various params and fields built up in the logger. In turn, this causes unbounded growth of the logger, leading to increased memory usage, and in at least one report was the likely cause of an OOM kill. More information can be found in the issue and the linked slack thread. This commit does a few things: - It was referenced in feedback in #14906 that it would've been better to not change the `QueryLogger` interface if possible, this PR proposes changes that bring it closer to alignment with the pre-3.0 `QueryLogger` interface contract - reverts `promql.Engine.exec()`'s usage of the query logger to the pattern of building up an array of args to pass at once to the end log call. Avoiding the repetitious calls to `.With()` are what resolve the issue with the logger growth/memory usage. - updates the scrape failure logger to use the update `QueryLogger` methods in the contract. - updates tests accordingly - cleans up unused methods Builds and passes tests successfully. Tested locally and confirmed I could no longer reproduce the issue/it resolved the issue. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
func (f *FakeQueryLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) {
// Test usage only really cares about existence of keyvals passed in
// via args, just append in the log message before handling the
// provided args and any embedded kvs added via `.With()` on f.attrs.
log := append([]any{msg}, args...)
log = append(log, f.attrs...)
f.attrs = f.attrs[:0]
f.logs = append(f.logs, log...)
}
// It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) With(args ...any) {
f.attrs = append(f.attrs, args...)
}
func TestQueryLogger_basic(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
queryExec := func() {
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
query := engine.NewTestQuery(func(ctx context.Context) error {
return contextDone(ctx, "test statement execution")
})
res := query.Exec(ctx)
require.NoError(t, res.Err)
}
// promql.Query works without query log initialized.
queryExec()
f1 := NewFakeQueryLogger()
engine.SetQueryLogger(f1)
queryExec()
require.Contains(t, f1.logs, `params`)
require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"})
l := len(f1.logs)
queryExec()
require.Len(t, f1.logs, 2*l)
// Test that we close the query logger when unsetting it.
require.False(t, f1.closed, "expected f1 to be open, got closed")
engine.SetQueryLogger(nil)
require.True(t, f1.closed, "expected f1 to be closed, got open")
queryExec()
// Test that we close the query logger when swapping.
f2 := NewFakeQueryLogger()
f3 := NewFakeQueryLogger()
engine.SetQueryLogger(f2)
require.False(t, f2.closed, "expected f2 to be open, got closed")
queryExec()
engine.SetQueryLogger(f3)
require.True(t, f2.closed, "expected f2 to be closed, got open")
require.False(t, f3.closed, "expected f3 to be open, got closed")
queryExec()
}
func TestQueryLogger_fields(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
f1 := NewFakeQueryLogger()
engine.SetQueryLogger(f1)
ctx, cancelCtx := context.WithCancel(context.Background())
ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"})
defer cancelCtx()
query := engine.NewTestQuery(func(ctx context.Context) error {
return contextDone(ctx, "test statement execution")
})
res := query.Exec(ctx)
require.NoError(t, res.Err)
require.Contains(t, f1.logs, `foo`)
require.Contains(t, f1.logs, `bar`)
}
func TestQueryLogger_error(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 10,
Timeout: 10 * time.Second,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
f1 := NewFakeQueryLogger()
engine.SetQueryLogger(f1)
ctx, cancelCtx := context.WithCancel(context.Background())
ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"})
defer cancelCtx()
testErr := errors.New("failure")
query := engine.NewTestQuery(func(ctx context.Context) error {
return testErr
})
res := query.Exec(ctx)
require.Error(t, res.Err, "query should have failed")
require.Contains(t, f1.logs, `params`)
require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"})
require.Contains(t, f1.logs, `error`)
require.Contains(t, f1.logs, testErr)
}
func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) {
startTime := time.Unix(1000, 0)
endTime := time.Unix(9999, 0)
testCases := []struct {
input string // The input to be parsed.
expected parser.Expr // The expected expression AST.
outputTest bool
}{
{
input: "123.4567",
expected: &parser.StepInvariantExpr{
Expr: &parser.NumberLiteral{
Val: 123.4567,
PosRange: posrange.PositionRange{Start: 0, End: 8},
},
},
},
{
input: `"foo"`,
expected: &parser.StepInvariantExpr{
Expr: &parser.StringLiteral{
Val: "foo",
PosRange: posrange.PositionRange{Start: 0, End: 5},
},
},
},
{
input: "foo * bar",
expected: &parser.BinaryExpr{
Op: parser.MUL,
LHS: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 3,
},
},
RHS: &parser.VectorSelector{
Name: "bar",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "bar"),
},
PosRange: posrange.PositionRange{
Start: 6,
End: 9,
},
},
VectorMatching: &parser.VectorMatching{Card: parser.CardOneToOne},
},
},
{
input: "foo * bar @ 10",
expected: &parser.BinaryExpr{
Op: parser.MUL,
LHS: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 3,
},
},
RHS: &parser.StepInvariantExpr{
Expr: &parser.VectorSelector{
Name: "bar",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "bar"),
},
PosRange: posrange.PositionRange{
Start: 6,
End: 14,
},
Timestamp: makeInt64Pointer(10000),
},
},
VectorMatching: &parser.VectorMatching{Card: parser.CardOneToOne},
},
},
{
input: "foo @ 20 * bar @ 10",
expected: &parser.StepInvariantExpr{
Expr: &parser.BinaryExpr{
Op: parser.MUL,
LHS: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 8,
},
Timestamp: makeInt64Pointer(20000),
},
RHS: &parser.VectorSelector{
Name: "bar",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "bar"),
},
PosRange: posrange.PositionRange{
Start: 11,
End: 19,
},
Timestamp: makeInt64Pointer(10000),
},
VectorMatching: &parser.VectorMatching{Card: parser.CardOneToOne},
},
},
},
{
input: "test[5s]",
expected: &parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "test",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "test"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 4,
},
},
Range: 5 * time.Second,
EndPos: 8,
},
},
{
input: `test{a="b"}[5y] @ 1603774699`,
expected: &parser.StepInvariantExpr{
Expr: &parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "test",
Timestamp: makeInt64Pointer(1603774699000),
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "a", "b"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "test"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
},
Range: 5 * 365 * 24 * time.Hour,
EndPos: 28,
},
},
},
{
input: "sum by (foo)(some_metric)",
expected: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 13,
End: 24,
},
},
Grouping: []string{"foo"},
PosRange: posrange.PositionRange{
Start: 0,
End: 25,
},
},
},
{
input: "sum by (foo)(some_metric @ 10)",
expected: &parser.StepInvariantExpr{
Expr: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 13,
End: 29,
},
Timestamp: makeInt64Pointer(10000),
},
Grouping: []string{"foo"},
PosRange: posrange.PositionRange{
Start: 0,
End: 30,
},
},
},
},
{
input: "sum(some_metric1 @ 10) + sum(some_metric2 @ 20)",
expected: &parser.StepInvariantExpr{
Expr: &parser.BinaryExpr{
Op: parser.ADD,
VectorMatching: &parser.VectorMatching{},
LHS: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric1",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric1"),
},
PosRange: posrange.PositionRange{
Start: 4,
End: 21,
},
Timestamp: makeInt64Pointer(10000),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 22,
},
},
RHS: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric2",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric2"),
},
PosRange: posrange.PositionRange{
Start: 29,
End: 46,
},
Timestamp: makeInt64Pointer(20000),
},
PosRange: posrange.PositionRange{
Start: 25,
End: 47,
},
},
},
},
},
{
input: "some_metric and topk(5, rate(some_metric[1m] @ 20))",
expected: &parser.BinaryExpr{
Op: parser.LAND,
VectorMatching: &parser.VectorMatching{
Card: parser.CardManyToMany,
},
LHS: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
},
RHS: &parser.StepInvariantExpr{
Expr: &parser.AggregateExpr{
Op: parser.TOPK,
Expr: &parser.Call{
Func: parser.MustGetFunction("rate"),
Args: parser.Expressions{
&parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 29,
End: 40,
},
Timestamp: makeInt64Pointer(20000),
},
Range: 1 * time.Minute,
EndPos: 49,
},
},
PosRange: posrange.PositionRange{
Start: 24,
End: 50,
},
},
Param: &parser.NumberLiteral{
Val: 5,
PosRange: posrange.PositionRange{
Start: 21,
End: 22,
},
},
PosRange: posrange.PositionRange{
Start: 16,
End: 51,
},
},
},
},
},
{
input: "time()",
expected: &parser.Call{
Func: parser.MustGetFunction("time"),
Args: parser.Expressions{},
PosRange: posrange.PositionRange{
Start: 0,
End: 6,
},
},
},
{
input: `foo{bar="baz"}[10m:6s]`,
expected: &parser.SubqueryExpr{
Expr: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "bar", "baz"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 14,
},
},
Range: 10 * time.Minute,
Step: 6 * time.Second,
EndPos: 22,
},
},
{
input: `foo{bar="baz"}[10m:6s] @ 10`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "bar", "baz"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 14,
},
},
Range: 10 * time.Minute,
Step: 6 * time.Second,
Timestamp: makeInt64Pointer(10000),
EndPos: 27,
},
},
},
{ // Even though the subquery is step invariant, the inside is also wrapped separately.
input: `sum(foo{bar="baz"} @ 20)[10m:6s] @ 10`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.StepInvariantExpr{
Expr: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "bar", "baz"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 4,
End: 23,
},
Timestamp: makeInt64Pointer(20000),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 24,
},
},
},
Range: 10 * time.Minute,
Step: 6 * time.Second,
Timestamp: makeInt64Pointer(10000),
EndPos: 37,
},
},
},
{
input: `min_over_time(rate(foo{bar="baz"}[2s])[5m:] @ 1603775091)[4m:3s]`,
expected: &parser.SubqueryExpr{
Expr: &parser.StepInvariantExpr{
Expr: &parser.Call{
Func: parser.MustGetFunction("min_over_time"),
Args: parser.Expressions{
&parser.SubqueryExpr{
Expr: &parser.Call{
Func: parser.MustGetFunction("rate"),
Args: parser.Expressions{
&parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "bar", "baz"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 19,
End: 33,
},
},
Range: 2 * time.Second,
EndPos: 37,
},
},
PosRange: posrange.PositionRange{
Start: 14,
End: 38,
},
},
Range: 5 * time.Minute,
Timestamp: makeInt64Pointer(1603775091000),
EndPos: 56,
},
},
PosRange: posrange.PositionRange{
Start: 0,
End: 57,
},
},
},
Range: 4 * time.Minute,
Step: 3 * time.Second,
EndPos: 64,
},
},
{
input: `some_metric @ 123 offset 1m [10m:5s]`,
expected: &parser.SubqueryExpr{
Expr: &parser.StepInvariantExpr{
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 27,
},
Timestamp: makeInt64Pointer(123000),
OriginalOffset: 1 * time.Minute,
},
},
Range: 10 * time.Minute,
Step: 5 * time.Second,
EndPos: 36,
},
},
{
input: `some_metric[10m:5s] offset 1m @ 123`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
},
Timestamp: makeInt64Pointer(123000),
OriginalOffset: 1 * time.Minute,
Range: 10 * time.Minute,
Step: 5 * time.Second,
EndPos: 35,
},
},
},
{
input: `(foo + bar{nm="val"} @ 1234)[5m:] @ 1603775019`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.ParenExpr{
Expr: &parser.BinaryExpr{
Op: parser.ADD,
VectorMatching: &parser.VectorMatching{
Card: parser.CardOneToOne,
},
LHS: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 1,
End: 4,
},
},
RHS: &parser.StepInvariantExpr{
Expr: &parser.VectorSelector{
Name: "bar",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "nm", "val"),
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "bar"),
},
Timestamp: makeInt64Pointer(1234000),
PosRange: posrange.PositionRange{
Start: 7,
End: 27,
},
},
},
},
PosRange: posrange.PositionRange{
Start: 0,
End: 28,
},
},
Range: 5 * time.Minute,
Timestamp: makeInt64Pointer(1603775019000),
EndPos: 46,
},
},
},
{
input: "abs(abs(metric @ 10))",
expected: &parser.StepInvariantExpr{
Expr: &parser.Call{
Func: &parser.Function{
Name: "abs",
ArgTypes: []parser.ValueType{parser.ValueTypeVector},
ReturnType: parser.ValueTypeVector,
},
Args: parser.Expressions{&parser.Call{
Func: &parser.Function{
Name: "abs",
ArgTypes: []parser.ValueType{parser.ValueTypeVector},
ReturnType: parser.ValueTypeVector,
},
Args: parser.Expressions{&parser.VectorSelector{
Name: "metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "metric"),
},
PosRange: posrange.PositionRange{
Start: 8,
End: 19,
},
Timestamp: makeInt64Pointer(10000),
}},
PosRange: posrange.PositionRange{
Start: 4,
End: 20,
},
}},
PosRange: posrange.PositionRange{
Start: 0,
End: 21,
},
},
},
},
{
input: "sum(sum(some_metric1 @ 10) + sum(some_metric2 @ 20))",
expected: &parser.StepInvariantExpr{
Expr: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.BinaryExpr{
Op: parser.ADD,
VectorMatching: &parser.VectorMatching{},
LHS: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric1",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric1"),
},
PosRange: posrange.PositionRange{
Start: 8,
End: 25,
},
Timestamp: makeInt64Pointer(10000),
},
PosRange: posrange.PositionRange{
Start: 4,
End: 26,
},
},
RHS: &parser.AggregateExpr{
Op: parser.SUM,
Expr: &parser.VectorSelector{
Name: "some_metric2",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric2"),
},
PosRange: posrange.PositionRange{
Start: 33,
End: 50,
},
Timestamp: makeInt64Pointer(20000),
},
PosRange: posrange.PositionRange{
Start: 29,
End: 52,
},
},
},
PosRange: posrange.PositionRange{
Start: 0,
End: 52,
},
},
},
},
{
input: `foo @ start()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 13,
},
Timestamp: makeInt64Pointer(timestamp.FromTime(startTime)),
StartOrEnd: parser.START,
},
},
},
{
input: `foo @ end()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "foo"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
Timestamp: makeInt64Pointer(timestamp.FromTime(endTime)),
StartOrEnd: parser.END,
},
},
},
{
input: `test[5y] @ start()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "test",
Timestamp: makeInt64Pointer(timestamp.FromTime(startTime)),
StartOrEnd: parser.START,
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "test"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 4,
},
},
Range: 5 * 365 * 24 * time.Hour,
EndPos: 18,
},
},
},
{
input: `test[5y] @ end()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.MatrixSelector{
VectorSelector: &parser.VectorSelector{
Name: "test",
Timestamp: makeInt64Pointer(timestamp.FromTime(endTime)),
StartOrEnd: parser.END,
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "test"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 4,
},
},
Range: 5 * 365 * 24 * time.Hour,
EndPos: 16,
},
},
},
{
input: `some_metric[10m:5s] @ start()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
},
Timestamp: makeInt64Pointer(timestamp.FromTime(startTime)),
StartOrEnd: parser.START,
Range: 10 * time.Minute,
Step: 5 * time.Second,
EndPos: 29,
},
},
},
{
input: `some_metric[10m:5s] @ end()`,
expected: &parser.StepInvariantExpr{
Expr: &parser.SubqueryExpr{
Expr: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 0,
End: 11,
},
},
Timestamp: makeInt64Pointer(timestamp.FromTime(endTime)),
StartOrEnd: parser.END,
Range: 10 * time.Minute,
Step: 5 * time.Second,
EndPos: 27,
},
},
},
{
input: `floor(some_metric / (3 * 1024))`,
outputTest: true,
expected: &parser.Call{
Func: &parser.Function{
Name: "floor",
ArgTypes: []parser.ValueType{parser.ValueTypeVector},
ReturnType: parser.ValueTypeVector,
},
Args: parser.Expressions{
&parser.BinaryExpr{
Op: parser.DIV,
LHS: &parser.VectorSelector{
Name: "some_metric",
LabelMatchers: []*labels.Matcher{
parser.MustLabelMatcher(labels.MatchEqual, "__name__", "some_metric"),
},
PosRange: posrange.PositionRange{
Start: 6,
End: 17,
},
},
RHS: &parser.StepInvariantExpr{
Expr: &parser.ParenExpr{
Expr: &parser.BinaryExpr{
Op: parser.MUL,
LHS: &parser.NumberLiteral{
Val: 3,
PosRange: posrange.PositionRange{
Start: 21,
End: 22,
},
},
RHS: &parser.NumberLiteral{
Val: 1024,
PosRange: posrange.PositionRange{
Start: 25,
End: 29,
},
},
},
PosRange: posrange.PositionRange{
Start: 20,
End: 30,
},
},
},
},
},
PosRange: posrange.PositionRange{
Start: 0,
End: 31,
},
},
},
}
for _, test := range testCases {
t.Run(test.input, func(t *testing.T) {
expr, err := parser.ParseExpr(test.input)
require.NoError(t, err)
expr = promql.PreprocessExpr(expr, startTime, endTime)
if test.outputTest {
require.Equal(t, test.input, expr.String(), "error on input '%s'", test.input)
}
require.Equal(t, test.expected, expr, "error on input '%s'", test.input)
})
}
}
func TestEngineOptsValidation(t *testing.T) {
cases := []struct {
opts promql.EngineOpts
query string
fail bool
expError error
}{
{
opts: promql.EngineOpts{EnableAtModifier: false},
query: "metric @ 100", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1h:1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "metric @ start()", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1h:1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "metric @ end()", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: false},
query: "rate(metric[1h:1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled,
}, {
opts: promql.EngineOpts{EnableAtModifier: true},
query: "metric @ 100",
}, {
opts: promql.EngineOpts{EnableAtModifier: true},
query: "rate(metric[1m] @ start())",
}, {
opts: promql.EngineOpts{EnableAtModifier: true},
query: "rate(metric[1h:1m] @ end())",
}, {
opts: promql.EngineOpts{EnableNegativeOffset: false},
query: "metric offset -1s", fail: true, expError: promql.ErrValidationNegativeOffsetDisabled,
}, {
opts: promql.EngineOpts{EnableNegativeOffset: true},
query: "metric offset -1s",
}, {
opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true},
query: "metric @ 100 offset -2m",
}, {
opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true},
query: "metric offset -2m @ 100",
},
}
for _, c := range cases {
eng := promqltest.NewTestEngineWithOpts(t, c.opts)
_, err1 := eng.NewInstantQuery(context.Background(), nil, nil, c.query, time.Unix(10, 0))
_, err2 := eng.NewRangeQuery(context.Background(), nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second)
if c.fail {
require.Equal(t, c.expError, err1)
require.Equal(t, c.expError, err2)
} else {
require.NoError(t, err1)
require.NoError(t, err2)
}
}
}
func TestEngine_Close(t *testing.T) {
t.Run("nil engine", func(t *testing.T) {
var ng *promql.Engine
require.NoError(t, ng.Close())
})
t.Run("non-nil engine", func(t *testing.T) {
ng := promql.NewEngine(promql.EngineOpts{
Logger: nil,
Reg: nil,
MaxSamples: 0,
Timeout: 100 * time.Second,
NoStepSubqueryIntervalFn: nil,
EnableAtModifier: true,
EnableNegativeOffset: true,
EnablePerStepStats: false,
LookbackDelta: 0,
EnableDelayedNameRemoval: true,
})
require.NoError(t, ng.Close())
})
}
func TestInstantQueryWithRangeVectorSelector(t *testing.T) {
engine := newTestEngine(t)
baseT := timestamp.Time(0)
storage := promqltest.LoadedStorage(t, `
load 1m
some_metric{env="1"} 0+1x4
some_metric{env="2"} 0+2x4
some_metric_with_stale_marker 0 1 stale 3
`)
t.Cleanup(func() { require.NoError(t, storage.Close()) })
testCases := map[string]struct {
expr string
expected promql.Matrix
ts time.Time
}{
"matches series with points in range": {
expr: "some_metric[2m]",
ts: baseT.Add(2 * time.Minute),
expected: promql.Matrix{
{
Metric: labels.FromStrings("__name__", "some_metric", "env", "1"),
Floats: []promql.FPoint{
{T: timestamp.FromTime(baseT.Add(time.Minute)), F: 1},
{T: timestamp.FromTime(baseT.Add(2 * time.Minute)), F: 2},
},
},
{
Metric: labels.FromStrings("__name__", "some_metric", "env", "2"),
Floats: []promql.FPoint{
{T: timestamp.FromTime(baseT.Add(time.Minute)), F: 2},
{T: timestamp.FromTime(baseT.Add(2 * time.Minute)), F: 4},
},
},
},
},
"matches no series": {
expr: "some_nonexistent_metric[1m]",
ts: baseT,
expected: promql.Matrix{},
},
"no samples in range": {
expr: "some_metric[1m]",
ts: baseT.Add(20 * time.Minute),
expected: promql.Matrix{},
},
"metric with stale marker": {
expr: "some_metric_with_stale_marker[3m]",
ts: baseT.Add(3 * time.Minute),
expected: promql.Matrix{
{
Metric: labels.FromStrings("__name__", "some_metric_with_stale_marker"),
Floats: []promql.FPoint{
{T: timestamp.FromTime(baseT.Add(time.Minute)), F: 1},
{T: timestamp.FromTime(baseT.Add(3 * time.Minute)), F: 3},
},
},
},
},
}
for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
q, err := engine.NewInstantQuery(context.Background(), storage, nil, testCase.expr, testCase.ts)
require.NoError(t, err)
defer q.Close()
res := q.Exec(context.Background())
require.NoError(t, res.Err)
testutil.RequireEqual(t, testCase.expected, res.Value)
})
}
}
func TestQueryLookbackDelta(t *testing.T) {
var (
load = `load 5m
metric 0 1 2
`
query = "metric"
lastDatapointTs = time.Unix(600, 0)
)
cases := []struct {
name string
ts time.Time
engineLookback, queryLookback time.Duration
expectSamples bool
}{
{
name: "default lookback delta",
ts: lastDatapointTs.Add(defaultLookbackDelta - time.Millisecond),
expectSamples: true,
},
{
name: "outside default lookback delta",
ts: lastDatapointTs.Add(defaultLookbackDelta),
expectSamples: false,
},
{
name: "custom engine lookback delta",
ts: lastDatapointTs.Add(10*time.Minute - time.Millisecond),
engineLookback: 10 * time.Minute,
expectSamples: true,
},
{
name: "outside custom engine lookback delta",
ts: lastDatapointTs.Add(10 * time.Minute),
engineLookback: 10 * time.Minute,
expectSamples: false,
},
{
name: "custom query lookback delta",
ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond),
engineLookback: 10 * time.Minute,
queryLookback: 20 * time.Minute,
expectSamples: true,
},
{
name: "outside custom query lookback delta",
ts: lastDatapointTs.Add(20 * time.Minute),
engineLookback: 10 * time.Minute,
queryLookback: 20 * time.Minute,
expectSamples: false,
},
{
name: "negative custom query lookback delta",
ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond),
engineLookback: -10 * time.Minute,
queryLookback: 20 * time.Minute,
expectSamples: true,
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
engine := promqltest.NewTestEngine(t, false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery)
storage := promqltest.LoadedStorage(t, load)
t.Cleanup(func() { storage.Close() })
opts := promql.NewPrometheusQueryOpts(false, c.queryLookback)
qry, err := engine.NewInstantQuery(context.Background(), storage, opts, query, c.ts)
require.NoError(t, err)
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
vec, ok := res.Value.(promql.Vector)
require.True(t, ok)
if c.expectSamples {
require.NotEmpty(t, vec)
} else {
require.Empty(t, vec)
}
})
}
}
func makeInt64Pointer(val int64) *int64 {
valp := new(int64)
*valp = val
return valp
}
func TestHistogramCopyFromIteratorRegression(t *testing.T) {
// Loading the following histograms creates two chunks because there's a
// counter reset. Not only the counter is lower in the last histogram
// but also there's missing buckets.
// This in turns means that chunk iterators will have different spans.
load := `load 1m
histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum:1 count:1 buckets:[1]}}
`
storage := promqltest.LoadedStorage(t, load)
t.Cleanup(func() { storage.Close() })
engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery)
verify := func(t *testing.T, qry promql.Query, expected []histogram.FloatHistogram) {
res := qry.Exec(context.Background())
require.NoError(t, res.Err)
m, ok := res.Value.(promql.Matrix)
require.True(t, ok)
require.Len(t, m, 1)
series := m[0]
require.Empty(t, series.Floats)
require.Len(t, series.Histograms, len(expected))
for i, e := range expected {
series.Histograms[i].H.CounterResetHint = histogram.UnknownCounterReset // Don't care.
require.Equal(t, &e, series.Histograms[i].H)
}
}
qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[90s])", time.Unix(0, 0), time.Unix(0, 0).Add(60*time.Second), time.Minute)
require.NoError(t, err)
verify(t, qry, []histogram.FloatHistogram{
{
Count: 3,
Sum: 3, // Increase from 4 to 6 is 2. Interpolation adds 1.
PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, // Two buckets changed between the first and second histogram.
PositiveBuckets: []float64{1.5, 1.5}, // Increase from 2 to 3 is 1 in both buckets. Interpolation adds 0.5.
},
})
qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[61s]", time.Unix(0, 0).Add(2*time.Minute))
require.NoError(t, err)
verify(t, qry, []histogram.FloatHistogram{
{
Count: 6,
Sum: 6,
PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}},
PositiveBuckets: []float64{3, 3},
},
{
Count: 1,
Sum: 1,
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []float64{1},
},
})
}
func TestRateAnnotations(t *testing.T) {
testCases := map[string]struct {
data string
expr string
expectedWarningAnnotations []string
expectedInfoAnnotations []string
}{
"info annotation when two samples are selected": {
data: `
series 1 2
`,
expr: "rate(series[1m1s])",
expectedWarningAnnotations: []string{},
expectedInfoAnnotations: []string{
`PromQL info: metric might not be a counter, name does not end in _total/_sum/_count/_bucket: "series" (1:6)`,
},
},
"no info annotations when no samples": {
data: `
series
`,
expr: "rate(series[1m1s])",
expectedWarningAnnotations: []string{},
expectedInfoAnnotations: []string{},
},
"no info annotations when selecting one sample": {
data: `
series 1 2
`,
expr: "rate(series[10s])",
expectedWarningAnnotations: []string{},
expectedInfoAnnotations: []string{},
},
"no info annotations when no samples due to mixed data types": {
data: `
series{label="a"} 1 {{schema:1 sum:15 count:10 buckets:[1 2 3]}}
`,
expr: "rate(series[1m1s])",
expectedWarningAnnotations: []string{
`PromQL warning: encountered a mix of histograms and floats for metric name "series" (1:6)`,
},
expectedInfoAnnotations: []string{},
},
"no info annotations when selecting two native histograms": {
data: `
series{label="a"} {{schema:1 sum:10 count:5 buckets:[1 2 3]}} {{schema:1 sum:15 count:10 buckets:[1 2 3]}}
`,
expr: "rate(series[1m1s])",
expectedWarningAnnotations: []string{},
expectedInfoAnnotations: []string{},
},
}
for name, testCase := range testCases {
t.Run(name, func(t *testing.T) {
store := promqltest.LoadedStorage(t, "load 1m\n"+strings.TrimSpace(testCase.data))
t.Cleanup(func() { _ = store.Close() })
engine := newTestEngine(t)
query, err := engine.NewInstantQuery(context.Background(), store, nil, testCase.expr, timestamp.Time(0).Add(1*time.Minute))
require.NoError(t, err)
t.Cleanup(query.Close)
res := query.Exec(context.Background())
require.NoError(t, res.Err)
warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0)
testutil.RequireEqual(t, testCase.expectedWarningAnnotations, warnings)
testutil.RequireEqual(t, testCase.expectedInfoAnnotations, infos)
})
}
}
func TestHistogramRateWithFloatStaleness(t *testing.T) {
// Make a chunk with two normal histograms of the same value.
h1 := histogram.Histogram{
Schema: 2,
Count: 10,
Sum: 100,
PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}},
PositiveBuckets: []int64{100},
}
c1 := chunkenc.NewHistogramChunk()
app, err := c1.Appender()
require.NoError(t, err)
var (
newc chunkenc.Chunk
recoded bool
)
newc, recoded, app, err = app.AppendHistogram(nil, 0, h1.Copy(), false)
require.NoError(t, err)
require.False(t, recoded)
require.Nil(t, newc)
newc, recoded, _, err = app.AppendHistogram(nil, 10, h1.Copy(), false)
require.NoError(t, err)
require.False(t, recoded)
require.Nil(t, newc)
// Make a chunk with a single float stale marker.
c2 := chunkenc.NewXORChunk()
app, err = c2.Appender()
require.NoError(t, err)
app.Append(20, math.Float64frombits(value.StaleNaN))
// Make a chunk with two normal histograms that have zero value.
h2 := histogram.Histogram{
Schema: 2,
}
c3 := chunkenc.NewHistogramChunk()
app, err = c3.Appender()
require.NoError(t, err)
newc, recoded, app, err = app.AppendHistogram(nil, 30, h2.Copy(), false)
require.NoError(t, err)
require.False(t, recoded)
require.Nil(t, newc)
newc, recoded, _, err = app.AppendHistogram(nil, 40, h2.Copy(), false)
require.NoError(t, err)
require.False(t, recoded)
require.Nil(t, newc)
querier := storage.MockQuerier{
SelectMockFunction: func(_ bool, _ *storage.SelectHints, _ ...*labels.Matcher) storage.SeriesSet {
return &singleSeriesSet{
series: mockSeries{chunks: []chunkenc.Chunk{c1, c2, c3}, labelSet: []string{"__name__", "foo"}},
}
},
}
queriable := storage.MockQueryable{MockQuerier: &querier}
engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery)
q, err := engine.NewInstantQuery(context.Background(), &queriable, nil, "rate(foo[40s])", timestamp.Time(45))
require.NoError(t, err)
defer q.Close()
res := q.Exec(context.Background())
require.NoError(t, res.Err)
vec, err := res.Vector()
require.NoError(t, err)
// Single sample result.
require.Len(t, vec, 1)
// The result is a histogram.
require.NotNil(t, vec[0].H)
// The result should be zero as the histogram has not increased, so the rate is zero.
require.Equal(t, 0.0, vec[0].H.Count)
require.Equal(t, 0.0, vec[0].H.Sum)
}
type singleSeriesSet struct {
series storage.Series
consumed bool
}
func (s *singleSeriesSet) Next() bool { c := s.consumed; s.consumed = true; return !c }
func (s singleSeriesSet) At() storage.Series { return s.series }
func (s singleSeriesSet) Err() error { return nil }
func (s singleSeriesSet) Warnings() annotations.Annotations { return nil }
type mockSeries struct {
chunks []chunkenc.Chunk
labelSet []string
}
func (s mockSeries) Labels() labels.Labels {
return labels.FromStrings(s.labelSet...)
}
func (s mockSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator {
iterables := []chunkenc.Iterator{}
for _, c := range s.chunks {
iterables = append(iterables, c.Iterator(nil))
}
return storage.ChainSampleIteratorFromIterators(it, iterables)
}
func TestEvaluationWithDelayedNameRemovalDisabled(t *testing.T) {
opts := promql.EngineOpts{
Logger: nil,
Reg: nil,
EnableAtModifier: true,
MaxSamples: 10000,
Timeout: 10 * time.Second,
EnableDelayedNameRemoval: false,
}
engine := promqltest.NewTestEngineWithOpts(t, opts)
promqltest.RunTest(t, `
load 5m
metric_total{env="1"} 0 60 120
another_metric{env="1"} 60 120 180
# Does not drop __name__ for vector selector
eval instant at 10m metric_total{env="1"}
metric_total{env="1"} 120
# Drops __name__ for unary operators
eval instant at 10m -metric_total
{env="1"} -120
# Drops __name__ for binary operators
eval instant at 10m metric_total + another_metric
{env="1"} 300
# Does not drop __name__ for binary comparison operators
eval instant at 10m metric_total <= another_metric
metric_total{env="1"} 120
# Drops __name__ for binary comparison operators with "bool" modifier
eval instant at 10m metric_total <= bool another_metric
{env="1"} 1
# Drops __name__ for vector-scalar operations
eval instant at 10m metric_total * 2
{env="1"} 240
# Drops __name__ for instant-vector functions
eval instant at 10m clamp(metric_total, 0, 100)
{env="1"} 100
# Drops __name__ for round function
eval instant at 10m round(metric_total)
{env="1"} 120
# Drops __name__ for range-vector functions
eval instant at 10m rate(metric_total{env="1"}[10m])
{env="1"} 0.2
# Does not drop __name__ for last_over_time function
eval instant at 10m last_over_time(metric_total{env="1"}[10m])
metric_total{env="1"} 120
# Drops name for other _over_time functions
eval instant at 10m max_over_time(metric_total{env="1"}[10m])
{env="1"} 120
`, engine)
}