From 0cc99e677ad3da2cf00599cb0e6c272ab58688f1 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 7 May 2024 18:14:22 +0200 Subject: [PATCH 01/83] promql.Engine: Add Close method Signed-off-by: Arve Knudsen --- cmd/prometheus/main.go | 9 +++++ promql/bench_test.go | 8 ++++ promql/engine.go | 11 ++++++ promql/engine_test.go | 71 ++++++++++++++++++++++------------ promql/functions_test.go | 3 ++ promql/promql_test.go | 9 +++-- promql/promqltest/test.go | 14 +++++-- promql/promqltest/test_test.go | 2 +- rules/alerting_test.go | 70 +++++++++++++++++++++------------ rules/manager_test.go | 36 ++++++++++++++--- rules/recording_test.go | 12 ++++-- web/api/v1/api_test.go | 48 ++++++++++++++--------- web/api/v1/errors_test.go | 9 ++++- 13 files changed, 218 insertions(+), 84 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index f2988b2f2d..dfc499c63f 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -955,12 +955,18 @@ func main() { listener, err := webHandler.Listener() if err != nil { level.Error(logger).Log("msg", "Unable to start web listener", "err", err) + if err := queryEngine.Close(); err != nil { + level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + } os.Exit(1) } err = toolkit_web.Validate(*webConfig) if err != nil { level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err) + if err := queryEngine.Close(); err != nil { + level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + } os.Exit(1) } @@ -982,6 +988,9 @@ func main() { case <-cancel: reloadReady.Close() } + if err := queryEngine.Close(); err != nil { + level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + } return nil }, func(err error) { diff --git a/promql/bench_test.go b/promql/bench_test.go index 9a85290915..eca721d565 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" @@ -257,6 +259,9 @@ func BenchmarkRangeQuery(b *testing.B) { Timeout: 100 * time.Second, } engine := promql.NewEngine(opts) + b.Cleanup(func() { + require.NoError(b, engine.Close()) + }) const interval = 10000 // 10s interval. // A day of data plus 10k steps. @@ -340,6 +345,9 @@ func BenchmarkNativeHistograms(b *testing.B) { for _, tc := range cases { b.Run(tc.name, func(b *testing.B) { ng := promql.NewEngine(opts) + b.Cleanup(func() { + require.NoError(b, ng.Close()) + }) for i := 0; i < b.N; i++ { qry, err := ng.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) if err != nil { diff --git a/promql/engine.go b/promql/engine.go index ea4bc1af85..8825be20ca 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -19,6 +19,7 @@ import ( "context" "errors" "fmt" + "io" "math" "reflect" "runtime" @@ -271,6 +272,8 @@ func contextErr(err error, env string) error { // // 2) Enforcement of the maximum number of concurrent queries. type QueryTracker interface { + io.Closer + // GetMaxConcurrent returns maximum number of concurrent queries that are allowed by this tracker. GetMaxConcurrent() int @@ -423,6 +426,14 @@ func NewEngine(opts EngineOpts) *Engine { } } +// Close closes ng. +func (ng *Engine) Close() error { + if ng.activeQueryTracker != nil { + return ng.activeQueryTracker.Close() + } + return nil +} + // SetQueryLogger sets the query logger. func (ng *Engine) SetQueryLogger(l QueryLogger) { ng.queryLoggerLock.Lock() diff --git a/promql/engine_test.go b/promql/engine_test.go index b7435d4738..8059993699 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -18,7 +18,6 @@ import ( "errors" "fmt" "math" - "os" "sort" "strconv" "sync" @@ -55,14 +54,7 @@ func TestMain(m *testing.M) { func TestQueryConcurrency(t *testing.T) { maxConcurrency := 10 - dir, err := os.MkdirTemp("", "test_concurrency") - require.NoError(t, err) - defer os.RemoveAll(dir) - queryTracker := promql.NewActiveQueryTracker(dir, maxConcurrency, nil) - t.Cleanup(func() { - require.NoError(t, queryTracker.Close()) - }) - + queryTracker := promql.NewActiveQueryTracker(t.TempDir(), maxConcurrency, nil) opts := promql.EngineOpts{ Logger: nil, Reg: nil, @@ -72,13 +64,18 @@ func TestQueryConcurrency(t *testing.T) { } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ctx, cancelCtx := context.WithCancel(context.Background()) - defer cancelCtx() + t.Cleanup(cancelCtx) block := make(chan struct{}) processing := make(chan struct{}) done := make(chan int) - defer close(done) + t.Cleanup(func() { + close(done) + }) f := func(context.Context) error { select { @@ -164,6 +161,9 @@ func TestQueryTimeout(t *testing.T) { Timeout: 5 * time.Millisecond, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -263,6 +263,9 @@ func TestQueryError(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) errStorage := promql.ErrStorage{errors.New("storage error")} queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return &errQuerier{err: errStorage}, nil @@ -597,6 +600,9 @@ func TestSelectHintsSetCorrectly(t *testing.T) { } { t.Run(tc.query, func(t *testing.T) { engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) hintsRecorder := &noopHintRecordingQueryable{} var ( @@ -628,6 +634,9 @@ func TestEngineShutdown(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ctx, cancelCtx := context.WithCancel(context.Background()) block := make(chan struct{}) @@ -763,7 +772,7 @@ load 10s t.Run(fmt.Sprintf("%d query=%s", i, c.Query), func(t *testing.T) { var err error var qry promql.Query - engine := newTestEngine() + engine := newTestEngine(t) if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start) } else { @@ -1305,7 +1314,7 @@ load 10s for _, c := range cases { t.Run(c.Query, func(t *testing.T) { opts := promql.NewPrometheusQueryOpts(true, 0) - engine := promqltest.NewTestEngine(true, 0, promqltest.DefaultMaxSamplesPerQuery) + engine := promqltest.NewTestEngine(t, true, 0, promqltest.DefaultMaxSamplesPerQuery) runQuery := func(expErr error) *stats.Statistics { var err error @@ -1332,7 +1341,7 @@ load 10s if c.SkipMaxCheck { return } - engine = promqltest.NewTestEngine(true, 0, stats.Samples.PeakSamples-1) + engine = promqltest.NewTestEngine(t, true, 0, stats.Samples.PeakSamples-1) runQuery(promql.ErrTooManySamples(env)) }) } @@ -1485,7 +1494,7 @@ load 10s for _, c := range cases { t.Run(c.Query, func(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) testFunc := func(expError error) { var err error var qry promql.Query @@ -1506,18 +1515,18 @@ load 10s } // Within limit. - engine = promqltest.NewTestEngine(false, 0, c.MaxSamples) + engine = promqltest.NewTestEngine(t, false, 0, c.MaxSamples) testFunc(nil) // Exceeding limit. - engine = promqltest.NewTestEngine(false, 0, c.MaxSamples-1) + engine = promqltest.NewTestEngine(t, false, 0, c.MaxSamples-1) testFunc(promql.ErrTooManySamples(env)) }) } } func TestAtModifier(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) storage := promqltest.LoadedStorage(t, ` load 10s metric{job="1"} 0+1x1000 @@ -1995,7 +2004,7 @@ func TestSubquerySelector(t *testing.T) { }, } { t.Run("", func(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) storage := promqltest.LoadedStorage(t, tst.loadString) t.Cleanup(func() { storage.Close() }) @@ -2016,7 +2025,7 @@ func TestSubquerySelector(t *testing.T) { } func TestTimestampFunction_StepsMoreOftenThanSamples(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) storage := promqltest.LoadedStorage(t, ` load 1m metric 0+1x1000 @@ -2086,6 +2095,9 @@ func TestQueryLogger_basic(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) queryExec := func() { ctx, cancelCtx := context.WithCancel(context.Background()) @@ -2137,6 +2149,9 @@ func TestQueryLogger_fields(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) @@ -2166,6 +2181,9 @@ func TestQueryLogger_error(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) @@ -3049,6 +3067,9 @@ func TestEngineOptsValidation(t *testing.T) { for _, c := range cases { eng := promql.NewEngine(c.opts) + t.Cleanup(func() { + require.NoError(t, eng.Close()) + }) _, err1 := eng.NewInstantQuery(context.Background(), nil, nil, c.query, time.Unix(10, 0)) _, err2 := eng.NewRangeQuery(context.Background(), nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second) if c.fail { @@ -3208,7 +3229,7 @@ func TestRangeQuery(t *testing.T) { } for _, c := range cases { t.Run(c.Name, func(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) storage := promqltest.LoadedStorage(t, c.Load) t.Cleanup(func() { storage.Close() }) @@ -3342,7 +3363,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { seriesName := "sparse_histogram_series" seriesNameOverTime := "sparse_histogram_series_over_time" - engine := newTestEngine() + engine := newTestEngine(t) ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) @@ -3612,7 +3633,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { for _, c := range cases { for _, floatHisto := range []bool{true, false} { t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) storage := teststorage.New(t) t.Cleanup(func() { storage.Close() }) @@ -3773,7 +3794,7 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { seriesName := "sparse_histogram_series" floatSeriesName := "float_series" - engine := newTestEngine() + engine := newTestEngine(t) ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) @@ -3896,7 +3917,7 @@ metric 0 1 2 for _, c := range cases { c := c t.Run(c.name, func(t *testing.T) { - engine := promqltest.NewTestEngine(false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery) + engine := promqltest.NewTestEngine(t, false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery) storage := promqltest.LoadedStorage(t, load) t.Cleanup(func() { storage.Close() }) diff --git a/promql/functions_test.go b/promql/functions_test.go index aef59c8379..ce62d316b5 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -40,6 +40,9 @@ func TestDeriv(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) a := storage.Appender(context.Background()) diff --git a/promql/promql_test.go b/promql/promql_test.go index 7bafc02e3b..0106a1e10b 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -27,12 +27,12 @@ import ( "github.com/prometheus/prometheus/util/teststorage" ) -func newTestEngine() *promql.Engine { - return promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) +func newTestEngine(t *testing.T) *promql.Engine { + return promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) } func TestEvaluations(t *testing.T) { - promqltest.RunBuiltinTests(t, newTestEngine()) + promqltest.RunBuiltinTests(t, newTestEngine(t)) } // Run a lot of queries at the same time, to check for race conditions. @@ -46,6 +46,9 @@ func TestConcurrentRangeQueries(t *testing.T) { Timeout: 100 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) const interval = 10000 // 10s interval. // A day of data plus 10k steps. diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 1affd91f63..29908acd9a 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -72,8 +72,8 @@ func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { return test.storage } -func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { - return promql.NewEngine(promql.EngineOpts{ +func NewTestEngine(t *testing.T, enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { + ng := promql.NewEngine(promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: maxSamples, @@ -84,6 +84,10 @@ func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamp EnablePerStepStats: enablePerStepStats, LookbackDelta: lookbackDelta, }) + t.Cleanup(func() { + require.NoError(t, ng.Close()) + }) + return ng } // RunBuiltinTests runs an acceptance test suite against the provided engine. @@ -1073,7 +1077,11 @@ func (ll *LazyLoader) Storage() storage.Storage { // Close closes resources associated with the LazyLoader. func (ll *LazyLoader) Close() error { ll.cancelCtx() - return ll.storage.Close() + err := ll.queryEngine.Close() + if sErr := ll.storage.Close(); sErr != nil { + return errors.Join(sErr, err) + } + return err } func makeInt64Pointer(val int64) *int64 { diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index f6fe387071..4fd3c40007 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -451,7 +451,7 @@ eval range from 0 to 5m step 5m testmetric for name, testCase := range testCases { t.Run(name, func(t *testing.T) { - err := runTest(t, testCase.input, NewTestEngine(false, 0, DefaultMaxSamplesPerQuery)) + err := runTest(t, testCase.input, NewTestEngine(t, false, 0, DefaultMaxSamplesPerQuery)) if testCase.expectedError == "" { require.NoError(t, err) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index a9315b47ee..ee19d2f255 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -36,16 +36,23 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) -var testEngine = promql.NewEngine(promql.EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10000, - Timeout: 100 * time.Second, - NoStepSubqueryIntervalFn: func(int64) int64 { return 60 * 1000 }, - EnableAtModifier: true, - EnableNegativeOffset: true, - EnablePerStepStats: true, -}) +func testEngine(tb testing.TB) *promql.Engine { + tb.Helper() + e := promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10000, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: func(int64) int64 { return 60 * 1000 }, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: true, + }) + tb.Cleanup(func() { + require.NoError(tb, e.Close()) + }) + return e +} func TestAlertingRuleState(t *testing.T) { tests := []struct { @@ -225,12 +232,14 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { }, } + ng := testEngine(t) + baseTime := time.Unix(0, 0) for i, result := range results { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -247,7 +256,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -309,13 +318,15 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { }, } + ng := testEngine(t) + evalTime := time.Unix(0, 0) result[0].T = timestamp.FromTime(evalTime) result[1].T = timestamp.FromTime(evalTime) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -329,7 +340,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } res, err = ruleWithExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -406,9 +417,11 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { result[0].T = timestamp.FromTime(evalTime) result[1].T = timestamp.FromTime(evalTime) + ng := testEngine(t) + var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -422,7 +435,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } res, err = ruleWithExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -475,9 +488,11 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { evalTime := time.Unix(0, 0) result[0].T = timestamp.FromTime(evalTime) + ng := testEngine(t) + var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := rule.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -520,6 +535,8 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; ) evalTime := time.Unix(0, 0) + ng := testEngine(t) + startQueryCh := make(chan struct{}) getDoneCh := make(chan struct{}) slowQueryFunc := func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { @@ -533,7 +550,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; require.Fail(t, "unexpected blocking when template expanding.") } } - return EngineQueryFunc(testEngine, storage)(ctx, q, ts) + return EngineQueryFunc(ng, storage)(ctx, q, ts) } go func() { <-startQueryCh @@ -579,6 +596,9 @@ func TestAlertingRuleDuplicate(t *testing.T) { } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -642,9 +662,9 @@ func TestAlertingRuleLimit(t *testing.T) { ) evalTime := time.Unix(0, 0) - + ng := testEngine(t) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -866,12 +886,13 @@ func TestKeepFiringFor(t *testing.T) { }, } + ng := testEngine(t) baseTime := time.Unix(0, 0) for i, result := range results { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -888,7 +909,7 @@ func TestKeepFiringFor(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -923,9 +944,10 @@ func TestPendingAndKeepFiringFor(t *testing.T) { F: 1, } + ng := testEngine(t) baseTime := time.Unix(0, 0) result.T = timestamp.FromTime(baseTime) - res, err := rule.Eval(context.TODO(), baseTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), baseTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -940,7 +962,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { } evalTime := baseTime.Add(time.Minute) - res, err = rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err = rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } diff --git a/rules/manager_test.go b/rules/manager_test.go index 2f7343ebb8..5ec142c372 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -157,12 +157,13 @@ func TestAlertingRule(t *testing.T) { }, } + ng := testEngine(t) for i, test := range tests { t.Logf("case %d", i) evalTime := baseTime.Add(test.time) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -296,6 +297,7 @@ func TestForStateAddSamples(t *testing.T) { }, } + ng := testEngine(t) var forState float64 for i, test := range tests { t.Logf("case %d", i) @@ -308,7 +310,7 @@ func TestForStateAddSamples(t *testing.T) { forState = float64(value.StaleNaN) } - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS' samples. @@ -359,8 +361,9 @@ func TestForStateRestore(t *testing.T) { expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) require.NoError(t, err) + ng := testEngine(t) opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), + QueryFunc: EngineQueryFunc(ng, storage), Appendable: storage, Queryable: storage, Context: context.Background(), @@ -528,6 +531,9 @@ func TestStaleness(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(engineOpts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) opts := &ManagerOptions{ QueryFunc: EngineQueryFunc(engine, st), Appendable: st, @@ -720,6 +726,9 @@ func TestUpdate(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ruleManager := NewManager(&ManagerOptions{ Appendable: st, Queryable: st, @@ -858,6 +867,9 @@ func TestNotify(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(engineOpts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) var lastNotified []*Alert notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { lastNotified = alerts @@ -933,6 +945,9 @@ func TestMetricsUpdate(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1007,6 +1022,9 @@ func TestGroupStalenessOnRemoval(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1084,6 +1102,9 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1186,6 +1207,9 @@ func TestRuleHealthUpdates(t *testing.T) { Timeout: 10 * time.Second, } engine := promql.NewEngine(engineOpts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) opts := &ManagerOptions{ QueryFunc: EngineQueryFunc(engine, st), Appendable: st, @@ -1282,9 +1306,10 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { }, } + ng := testEngine(t) testFunc := func(tst testInput) { opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), + QueryFunc: EngineQueryFunc(ng, storage), Appendable: storage, Queryable: storage, Context: context.Background(), @@ -1368,8 +1393,9 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { } require.NoError(t, app.Commit()) + ng := testEngine(t) opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), + QueryFunc: EngineQueryFunc(ng, storage), Appendable: storage, Queryable: storage, Context: context.Background(), diff --git a/rules/recording_test.go b/rules/recording_test.go index 49f37b1ac9..4c2288e6fd 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -123,10 +123,11 @@ func TestRuleEval(t *testing.T) { storage := setUpRuleEvalTest(t) t.Cleanup(func() { storage.Close() }) + ng := testEngine(t) for _, scenario := range ruleEvalTestScenarios { t.Run(scenario.name, func(t *testing.T) { rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels) - result, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + result, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(ng, storage), nil, 0) require.NoError(t, err) testutil.RequireEqual(t, scenario.expected, result) }) @@ -137,6 +138,7 @@ func BenchmarkRuleEval(b *testing.B) { storage := setUpRuleEvalTest(b) b.Cleanup(func() { storage.Close() }) + ng := testEngine(b) for _, scenario := range ruleEvalTestScenarios { b.Run(scenario.name, func(b *testing.B) { rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels) @@ -144,7 +146,7 @@ func BenchmarkRuleEval(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + _, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(ng, storage), nil, 0) if err != nil { require.NoError(b, err) } @@ -166,6 +168,9 @@ func TestRuleEvalDuplicate(t *testing.T) { } engine := promql.NewEngine(opts) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -212,10 +217,11 @@ func TestRecordingRuleLimit(t *testing.T) { labels.FromStrings("test", "test"), ) + ng := testEngine(t) evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(ng, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index b30890893b..3fe5a092f5 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -59,16 +59,25 @@ import ( "github.com/prometheus/prometheus/util/teststorage" ) -var testEngine = promql.NewEngine(promql.EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10000, - Timeout: 100 * time.Second, - NoStepSubqueryIntervalFn: func(int64) int64 { return 60 * 1000 }, - EnableAtModifier: true, - EnableNegativeOffset: true, - EnablePerStepStats: true, -}) +func testEngine(t *testing.T) *promql.Engine { + t.Helper() + + ng := promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10000, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: func(int64) int64 { return 60 * 1000 }, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: true, + }) + t.Cleanup(func() { + require.NoError(t, ng.Close()) + }) + + return ng +} // testMetaStore satisfies the scrape.MetricMetadataStore interface. // It is used to inject specific metadata as part of a test case. @@ -283,6 +292,9 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { } engine := promql.NewEngine(engineOpts) + m.testing.Cleanup(func() { + require.NoError(m.testing, engine.Close()) + }) opts := &rules.ManagerOptions{ QueryFunc: rules.EngineQueryFunc(engine, storage), Appendable: storage, @@ -403,9 +415,10 @@ func TestEndpoints(t *testing.T) { now := time.Now() + ng := testEngine(t) + t.Run("local", func(t *testing.T) { - algr := rulesRetrieverMock{} - algr.testing = t + algr := rulesRetrieverMock{testing: t} algr.CreateAlertingRules() algr.CreateRuleGroups() @@ -417,7 +430,7 @@ func TestEndpoints(t *testing.T) { api := &API{ Queryable: storage, - QueryEngine: testEngine, + QueryEngine: ng, ExemplarQueryable: storage.ExemplarQueryable(), targetRetriever: testTargetRetriever.toFactory(), alertmanagerRetriever: testAlertmanagerRetriever{}.toFactory(), @@ -468,8 +481,7 @@ func TestEndpoints(t *testing.T) { }) require.NoError(t, err) - algr := rulesRetrieverMock{} - algr.testing = t + algr := rulesRetrieverMock{testing: t} algr.CreateAlertingRules() algr.CreateRuleGroups() @@ -481,7 +493,7 @@ func TestEndpoints(t *testing.T) { api := &API{ Queryable: remote, - QueryEngine: testEngine, + QueryEngine: ng, ExemplarQueryable: storage.ExemplarQueryable(), targetRetriever: testTargetRetriever.toFactory(), alertmanagerRetriever: testAlertmanagerRetriever{}.toFactory(), @@ -623,7 +635,7 @@ func TestQueryExemplars(t *testing.T) { api := &API{ Queryable: storage, - QueryEngine: testEngine, + QueryEngine: testEngine(t), ExemplarQueryable: storage.ExemplarQueryable(), } @@ -831,7 +843,7 @@ func TestStats(t *testing.T) { api := &API{ Queryable: storage, - QueryEngine: testEngine, + QueryEngine: testEngine(t), now: func() time.Time { return time.Unix(123, 0) }, diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index e76a1a3d35..5ddd47e1b2 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -86,7 +86,7 @@ func TestApiStatusCodes(t *testing.T) { "error from seriesset": errorTestQueryable{q: errorTestQuerier{s: errorTestSeriesSet{err: tc.err}}}, } { t.Run(fmt.Sprintf("%s/%s", name, k), func(t *testing.T) { - r := createPrometheusAPI(q) + r := createPrometheusAPI(t, q) rec := httptest.NewRecorder() req := httptest.NewRequest(http.MethodGet, "/api/v1/query?query=up", nil) @@ -100,7 +100,9 @@ func TestApiStatusCodes(t *testing.T) { } } -func createPrometheusAPI(q storage.SampleAndChunkQueryable) *route.Router { +func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route.Router { + t.Helper() + engine := promql.NewEngine(promql.EngineOpts{ Logger: log.NewNopLogger(), Reg: nil, @@ -108,6 +110,9 @@ func createPrometheusAPI(q storage.SampleAndChunkQueryable) *route.Router { MaxSamples: 100, Timeout: 5 * time.Second, }) + t.Cleanup(func() { + require.NoError(t, engine.Close()) + }) api := NewAPI( engine, From 493e196f101630006d6295313834af5b4d998974 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 3 Jul 2024 15:12:40 +0200 Subject: [PATCH 02/83] Fix bad merge Signed-off-by: Arve Knudsen --- promql/engine_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index 45716539c8..70dad315ac 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3041,7 +3041,6 @@ func TestEngineOptsValidation(t *testing.T) { } } - engine := newTestEngine() func TestInstantQueryWithRangeVectorSelector(t *testing.T) { engine := newTestEngine(t) From fbc9eddfaf9ea5136c1008d997834cb3ac07ba90 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Sun, 14 Jul 2024 13:28:59 +0200 Subject: [PATCH 03/83] Refactor engine creation in tests Signed-off-by: Arve Knudsen --- promql/bench_test.go | 13 +++-------- promql/engine_test.go | 47 +++++++++------------------------------ promql/functions_test.go | 6 ++--- promql/promql_test.go | 5 +---- promql/promqltest/test.go | 15 +++++++++---- rules/alerting_test.go | 11 ++------- rules/manager_test.go | 35 ++++++----------------------- rules/recording_test.go | 5 +---- web/api/v1/api_test.go | 14 ++---------- web/api/v1/errors_test.go | 6 ++--- 10 files changed, 41 insertions(+), 116 deletions(-) diff --git a/promql/bench_test.go b/promql/bench_test.go index 2f244be1d4..bc953464e4 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,12 +21,11 @@ import ( "testing" "time" - "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/teststorage" @@ -273,10 +272,7 @@ func BenchmarkRangeQuery(b *testing.B) { MaxSamples: 50000000, Timeout: 100 * time.Second, } - engine := promql.NewEngine(opts) - b.Cleanup(func() { - require.NoError(b, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(b, opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. @@ -367,10 +363,7 @@ func BenchmarkNativeHistograms(b *testing.B) { for _, tc := range cases { b.Run(tc.name, func(b *testing.B) { - ng := promql.NewEngine(opts) - b.Cleanup(func() { - require.NoError(b, ng.Close()) - }) + ng := promqltest.NewTestEngineWithOpts(b, opts) for i := 0; i < b.N; i++ { qry, err := ng.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) if err != nil { diff --git a/promql/engine_test.go b/promql/engine_test.go index f166b6d13a..d58d23d45c 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -64,11 +64,8 @@ func TestQueryConcurrency(t *testing.T) { Timeout: 100 * time.Second, ActiveQueryTracker: queryTracker, } + engine := promqltest.NewTestEngineWithOpts(t, opts) - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) ctx, cancelCtx := context.WithCancel(context.Background()) t.Cleanup(cancelCtx) @@ -162,10 +159,7 @@ func TestQueryTimeout(t *testing.T) { MaxSamples: 10, Timeout: 5 * time.Millisecond, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -190,7 +184,7 @@ func TestQueryCancel(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) + engine := promqltest.NewTestEngineWithOpts(t, opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -264,10 +258,7 @@ func TestQueryError(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) errStorage := promql.ErrStorage{errors.New("storage error")} queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return &errQuerier{err: errStorage}, nil @@ -601,10 +592,7 @@ func TestSelectHintsSetCorrectly(t *testing.T) { }, } { t.Run(tc.query, func(t *testing.T) { - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) hintsRecorder := &noopHintRecordingQueryable{} var ( @@ -635,10 +623,7 @@ func TestEngineShutdown(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ctx, cancelCtx := context.WithCancel(context.Background()) block := make(chan struct{}) @@ -2055,10 +2040,7 @@ func TestQueryLogger_basic(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) queryExec := func() { ctx, cancelCtx := context.WithCancel(context.Background()) @@ -2109,10 +2091,7 @@ func TestQueryLogger_fields(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) @@ -2141,10 +2120,7 @@ func TestQueryLogger_error(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) @@ -3027,10 +3003,7 @@ func TestEngineOptsValidation(t *testing.T) { } for _, c := range cases { - eng := promql.NewEngine(c.opts) - t.Cleanup(func() { - require.NoError(t, eng.Close()) - }) + eng := promqltest.NewTestEngineWithOpts(t, c.opts) _, err1 := eng.NewInstantQuery(context.Background(), nil, nil, c.query, time.Unix(10, 0)) _, err2 := eng.NewRangeQuery(context.Background(), nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second) if c.fail { diff --git a/promql/functions_test.go b/promql/functions_test.go index ce62d316b5..9ee0ba51dc 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" ) @@ -39,10 +40,7 @@ func TestDeriv(t *testing.T) { MaxSamples: 10000, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) a := storage.Appender(context.Background()) diff --git a/promql/promql_test.go b/promql/promql_test.go index 2a5834681d..345ecab5ed 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -48,10 +48,7 @@ func TestConcurrentRangeQueries(t *testing.T) { } // Enable experimental functions testing parser.EnableExperimentalFunctions = true - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 1fda9985a2..8c7a346c2f 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -74,8 +74,9 @@ func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { return test.storage } -func NewTestEngine(t *testing.T, enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { - ng := promql.NewEngine(promql.EngineOpts{ +// NewTestEngine creates a promql.Engine with enablePerStepStats, lookbackDelta and maxSamples, and returns it. +func NewTestEngine(tb testing.TB, enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { + return NewTestEngineWithOpts(tb, promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: maxSamples, @@ -86,8 +87,14 @@ func NewTestEngine(t *testing.T, enablePerStepStats bool, lookbackDelta time.Dur EnablePerStepStats: enablePerStepStats, LookbackDelta: lookbackDelta, }) - t.Cleanup(func() { - require.NoError(t, ng.Close()) +} + +// NewTestEngineWithOpts creates a promql.Engine with opts and returns it. +func NewTestEngineWithOpts(tb testing.TB, opts promql.EngineOpts) *promql.Engine { + tb.Helper() + ng := promql.NewEngine(opts) + tb.Cleanup(func() { + require.NoError(tb, ng.Close()) }) return ng } diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 2ff641b4b2..81c065eaa6 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -38,7 +38,7 @@ import ( func testEngine(tb testing.TB) *promql.Engine { tb.Helper() - e := promql.NewEngine(promql.EngineOpts{ + return promqltest.NewTestEngineWithOpts(tb, promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, @@ -48,10 +48,6 @@ func testEngine(tb testing.TB) *promql.Engine { EnableNegativeOffset: true, EnablePerStepStats: true, }) - tb.Cleanup(func() { - require.NoError(tb, e.Close()) - }) - return e } func TestAlertingRuleState(t *testing.T) { @@ -595,10 +591,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() diff --git a/rules/manager_test.go b/rules/manager_test.go index 854048281a..ea65a943dd 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -542,10 +542,7 @@ func TestStaleness(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(engineOpts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, engineOpts) opts := &ManagerOptions{ QueryFunc: EngineQueryFunc(engine, st), Appendable: st, @@ -779,10 +776,7 @@ func TestUpdate(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ruleManager := NewManager(&ManagerOptions{ Appendable: st, Queryable: st, @@ -920,10 +914,7 @@ func TestNotify(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(engineOpts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, engineOpts) var lastNotified []*Alert notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { lastNotified = alerts @@ -998,10 +989,7 @@ func TestMetricsUpdate(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1075,10 +1063,7 @@ func TestGroupStalenessOnRemoval(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1155,10 +1140,7 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ruleManager := NewManager(&ManagerOptions{ Appendable: storage, Queryable: storage, @@ -1260,10 +1242,7 @@ func TestRuleHealthUpdates(t *testing.T) { MaxSamples: 10, Timeout: 10 * time.Second, } - engine := promql.NewEngine(engineOpts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, engineOpts) opts := &ManagerOptions{ QueryFunc: EngineQueryFunc(engine, st), Appendable: st, diff --git a/rules/recording_test.go b/rules/recording_test.go index 98640ab0e3..72c0764f9b 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -167,10 +167,7 @@ func TestRuleEvalDuplicate(t *testing.T) { Timeout: 10 * time.Second, } - engine := promql.NewEngine(opts) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(t, opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 3e94d18a02..61617189d4 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -61,8 +61,7 @@ import ( func testEngine(t *testing.T) *promql.Engine { t.Helper() - - ng := promql.NewEngine(promql.EngineOpts{ + return promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, @@ -72,11 +71,6 @@ func testEngine(t *testing.T) *promql.Engine { EnableNegativeOffset: true, EnablePerStepStats: true, }) - t.Cleanup(func() { - require.NoError(t, ng.Close()) - }) - - return ng } // testMetaStore satisfies the scrape.MetricMetadataStore interface. @@ -315,11 +309,7 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { MaxSamples: 10, Timeout: 100 * time.Second, } - - engine := promql.NewEngine(engineOpts) - m.testing.Cleanup(func() { - require.NoError(m.testing, engine.Close()) - }) + engine := promqltest.NewTestEngineWithOpts(m.testing, engineOpts) opts := &rules.ManagerOptions{ QueryFunc: rules.EngineQueryFunc(engine, storage), Appendable: storage, diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index d7fee40796..7e1fc09d8a 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" @@ -103,16 +104,13 @@ func TestApiStatusCodes(t *testing.T) { func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route.Router { t.Helper() - engine := promql.NewEngine(promql.EngineOpts{ + engine := promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ Logger: log.NewNopLogger(), Reg: nil, ActiveQueryTracker: nil, MaxSamples: 100, Timeout: 5 * time.Second, }) - t.Cleanup(func() { - require.NoError(t, engine.Close()) - }) api := NewAPI( engine, From 6100e756a8ffb8aec255f0964e5471aafbe87de3 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 26 Jul 2024 09:49:57 +0200 Subject: [PATCH 04/83] Ignore stale histograms for counter reset detection The histogram stats decoder keeps track of the last seen histogram sample in order to properly detect counter resets. We are seeing an issue where a histogram with UnknownResetHint gets treated as a counter reset when it follows a stale histogram sample. I believe that this is incorrect since stale samples should be completely ignored in PromQL. As a result, they should not be stored in the histogram stats iterator and the counter reset detection needs to be done against the last non-stale sample. Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator.go | 2 - promql/histogram_stats_iterator_test.go | 123 +++++++++++++++--------- tsdb/tsdbutil/histogram.go | 10 +- 3 files changed, 84 insertions(+), 51 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index dfafea5f8c..0a5f67ae7c 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -48,7 +48,6 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi var t int64 t, f.currentH = f.Iterator.AtHistogram(f.currentH) if value.IsStaleNaN(f.currentH.Sum) { - f.setLastH(f.currentH) h = &histogram.Histogram{Sum: f.currentH.Sum} return t, h } @@ -77,7 +76,6 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) var t int64 t, f.currentFH = f.Iterator.AtFloatHistogram(f.currentFH) if value.IsStaleNaN(f.currentFH.Sum) { - f.setLastFH(f.currentFH) return t, &histogram.FloatHistogram{Sum: f.currentFH.Sum} } diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index b71a9d6029..d5c081348c 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -14,62 +14,99 @@ package promql import ( + "fmt" + "math" "testing" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/tsdbutil" ) func TestHistogramStatsDecoding(t *testing.T) { - histograms := []*histogram.Histogram{ - tsdbutil.GenerateTestHistogram(0), - tsdbutil.GenerateTestHistogram(1), - tsdbutil.GenerateTestHistogram(2), - tsdbutil.GenerateTestHistogram(2), - } - histograms[0].CounterResetHint = histogram.NotCounterReset - histograms[1].CounterResetHint = histogram.UnknownCounterReset - histograms[2].CounterResetHint = histogram.CounterReset - histograms[3].CounterResetHint = histogram.UnknownCounterReset - - expectedHints := []histogram.CounterResetHint{ - histogram.NotCounterReset, - histogram.NotCounterReset, - histogram.CounterReset, - histogram.NotCounterReset, + cases := []struct { + name string + histograms []*histogram.Histogram + expectedHints []histogram.CounterResetHint + }{ + { + name: "unknown counter reset triggers detection", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(2, histogram.CounterReset), + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.NotCounterReset, + histogram.CounterReset, + histogram.NotCounterReset, + }, + }, + { + name: "stale sample before unknown reset hint", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(0, histogram.NotCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + {Sum: math.Float64frombits(value.StaleNaN)}, + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.NotCounterReset, + histogram.UnknownCounterReset, + histogram.NotCounterReset, + }, + }, } - t.Run("histogram_stats", func(t *testing.T) { - decodedStats := make([]*histogram.Histogram, 0) - statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) - for statsIterator.Next() != chunkenc.ValNone { - _, h := statsIterator.AtHistogram(nil) - decodedStats = append(decodedStats, h) - } - for i := 0; i < len(histograms); i++ { - require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) - require.Equal(t, histograms[i].Count, decodedStats[i].Count) - require.Equal(t, histograms[i].Sum, decodedStats[i].Sum) - } - }) - t.Run("float_histogram_stats", func(t *testing.T) { - decodedStats := make([]*histogram.FloatHistogram, 0) - statsIterator := NewHistogramStatsIterator(newHistogramSeries(histograms).Iterator(nil)) - for statsIterator.Next() != chunkenc.ValNone { - _, h := statsIterator.AtFloatHistogram(nil) - decodedStats = append(decodedStats, h) - } - for i := 0; i < len(histograms); i++ { - fh := histograms[i].ToFloat(nil) - require.Equal(t, expectedHints[i], decodedStats[i].CounterResetHint) - require.Equal(t, fh.Count, decodedStats[i].Count) - require.Equal(t, fh.Sum, decodedStats[i].Sum) - } - }) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Run("histogram_stats", func(t *testing.T) { + decodedStats := make([]*histogram.Histogram, 0) + statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil)) + for statsIterator.Next() != chunkenc.ValNone { + _, h := statsIterator.AtHistogram(nil) + decodedStats = append(decodedStats, h) + } + for i := 0; i < len(tc.histograms); i++ { + require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i)) + h := tc.histograms[i] + if value.IsStaleNaN(h.Sum) { + require.True(t, value.IsStaleNaN(decodedStats[i].Sum)) + require.Equal(t, uint64(0), decodedStats[i].Count) + } else { + require.Equal(t, tc.histograms[i].Count, decodedStats[i].Count) + require.Equal(t, tc.histograms[i].Sum, decodedStats[i].Sum) + } + } + }) + t.Run("float_histogram_stats", func(t *testing.T) { + decodedStats := make([]*histogram.FloatHistogram, 0) + statsIterator := NewHistogramStatsIterator(newHistogramSeries(tc.histograms).Iterator(nil)) + for statsIterator.Next() != chunkenc.ValNone { + _, h := statsIterator.AtFloatHistogram(nil) + decodedStats = append(decodedStats, h) + } + for i := 0; i < len(tc.histograms); i++ { + require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint) + fh := tc.histograms[i].ToFloat(nil) + if value.IsStaleNaN(fh.Sum) { + require.True(t, value.IsStaleNaN(decodedStats[i].Sum)) + require.Equal(t, float64(0), decodedStats[i].Count) + } else { + require.Equal(t, fh.Count, decodedStats[i].Count) + require.Equal(t, fh.Sum, decodedStats[i].Sum) + } + } + }) + }) + } } type histogramSeries struct { diff --git a/tsdb/tsdbutil/histogram.go b/tsdb/tsdbutil/histogram.go index 3c7349cf72..ce934a638d 100644 --- a/tsdb/tsdbutil/histogram.go +++ b/tsdb/tsdbutil/histogram.go @@ -30,12 +30,10 @@ func GenerateTestHistograms(n int) (r []*histogram.Histogram) { return r } -func GenerateTestHistogramsWithUnknownResetHint(n int) []*histogram.Histogram { - hs := GenerateTestHistograms(n) - for i := range hs { - hs[i].CounterResetHint = histogram.UnknownCounterReset - } - return hs +func GenerateTestHistogramWithHint(n int, hint histogram.CounterResetHint) *histogram.Histogram { + h := GenerateTestHistogram(n) + h.CounterResetHint = hint + return h } // GenerateTestHistogram but it is up to the user to set any known counter reset hint. From 02d9d874a2f02d09201f77c7d122b042a3da4ab9 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 29 Jul 2024 14:53:32 +0200 Subject: [PATCH 05/83] Add more test cases Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator_test.go | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index d5c081348c..7a2953d3e2 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -63,6 +63,39 @@ func TestHistogramStatsDecoding(t *testing.T) { histogram.NotCounterReset, }, }, + { + name: "unknown counter reset at the beginning", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + }, + }, + { + name: "detect real counter reset", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.CounterReset, + }, + }, + { + name: "detect real counter reset after stale NaN", + histograms: []*histogram.Histogram{ + tsdbutil.GenerateTestHistogramWithHint(2, histogram.UnknownCounterReset), + {Sum: math.Float64frombits(value.StaleNaN)}, + tsdbutil.GenerateTestHistogramWithHint(1, histogram.UnknownCounterReset), + }, + expectedHints: []histogram.CounterResetHint{ + histogram.NotCounterReset, + histogram.UnknownCounterReset, + histogram.CounterReset, + }, + }, } for _, tc := range cases { From 8f9751b00ddef4b06aa91b7213442726076c193f Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Wed, 31 Jul 2024 11:17:09 +0200 Subject: [PATCH 06/83] Use CopyTo when resetting histogram in stats iterator The histogram stats iterator does not fully clear the histogram object and is not resilient to new fields being added to the histogram type. To resolve the issue, the commit uses the CopyTo methods which should be future proof to new fields being added. Signed-off-by: Filip Petkovski --- promql/histogram_stats_iterator.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/promql/histogram_stats_iterator.go b/promql/histogram_stats_iterator.go index dfafea5f8c..c1b7746ecf 100644 --- a/promql/histogram_stats_iterator.go +++ b/promql/histogram_stats_iterator.go @@ -63,9 +63,13 @@ func (f *histogramStatsIterator) AtHistogram(h *histogram.Histogram) (int64, *hi return t, h } - h.CounterResetHint = f.getResetHint(f.currentH) - h.Count = f.currentH.Count - h.Sum = f.currentH.Sum + returnValue := histogram.Histogram{ + CounterResetHint: f.getResetHint(f.currentH), + Count: f.currentH.Count, + Sum: f.currentH.Sum, + } + returnValue.CopyTo(h) + f.setLastH(f.currentH) return t, h } @@ -91,9 +95,13 @@ func (f *histogramStatsIterator) AtFloatHistogram(fh *histogram.FloatHistogram) return t, fh } - fh.CounterResetHint = f.getFloatResetHint(f.currentFH.CounterResetHint) - fh.Count = f.currentFH.Count - fh.Sum = f.currentFH.Sum + returnValue := histogram.FloatHistogram{ + CounterResetHint: f.getFloatResetHint(f.currentFH.CounterResetHint), + Count: f.currentFH.Count, + Sum: f.currentFH.Sum, + } + returnValue.CopyTo(fh) + f.setLastFH(f.currentFH) return t, fh } From 909785b97fd4b48317739377283c699d81ffe5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 6 Aug 2024 07:46:26 +0200 Subject: [PATCH 07/83] Fix histogram pool poisoning bu chunkenc.Iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chunkenc.Iterator.AtFloatHistogram may do a shallow copy if it receives nil as input pointer. This can in turn share the span slice with multiple histograms in the matrixSelectorHPool, leading to unexpected errors. Signed-off-by: György Krajcsovits --- promql/engine.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index 25e67db633..621c2116e0 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2356,6 +2356,11 @@ loop: } else { histograms = append(histograms, HPoint{H: &histogram.FloatHistogram{}}) } + if histograms[n].H == nil { + // Initialize to non zero to AtFloatHistogram does a copy for sure. + // Not an issue in the loop above since that uses an intermediate buffer. + histograms[n].H = &histogram.FloatHistogram{} + } histograms[n].T, histograms[n].H = it.AtFloatHistogram(histograms[n].H) if value.IsStaleNaN(histograms[n].H.Sum) { histograms = histograms[:n] From 1fb0ff7e4516d5993e84c2bee0f4e303b7f0c8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 6 Aug 2024 20:48:16 +0200 Subject: [PATCH 08/83] Add unit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- promql/engine_test.go | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/promql/engine_test.go b/promql/engine_test.go index 523c0613df..9ed3dd939f 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3798,3 +3798,62 @@ func makeInt64Pointer(val int64) *int64 { *valp = val return valp } + +func TestHistogramCopyFromIteratorRegression(t *testing.T) { + // Loading the following histograms creates two chunks because there's a + // counter reset. Not only the counter is lower in the last histogram + // but also there's missing buckets. + // This in turns means that chunk iterators will have different spans. + load := `load 1m +histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum:1 count:1 buckets:[1]}} +` + storage := promqltest.LoadedStorage(t, load) + t.Cleanup(func() { storage.Close() }) + engine := promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) + + verify := func(t *testing.T, qry promql.Query, expected []histogram.FloatHistogram) { + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + + m, ok := res.Value.(promql.Matrix) + require.True(t, ok) + + require.Len(t, m, 1) + series := m[0] + + require.Empty(t, series.Floats) + require.Len(t, series.Histograms, len(expected)) + for i, e := range expected { + series.Histograms[i].H.CounterResetHint = histogram.UnknownCounterReset // Don't care. + require.Equal(t, &e, series.Histograms[i].H) + } + } + + qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[60s])", time.Unix(0, 0), time.Unix(0, 0).Add(1*time.Minute), time.Minute) + require.NoError(t, err) + verify(t, qry, []histogram.FloatHistogram{ + { + Count: 2, + Sum: 2, // Increase from 4 to 6 is 2. + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, // Two buckets changed between the first and second histogram. + PositiveBuckets: []float64{1, 1}, // Increase from 2 to 3 is 1 in both buckets. + }, + }) + + qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[60s]", time.Unix(0, 0).Add(2*time.Minute)) + require.NoError(t, err) + verify(t, qry, []histogram.FloatHistogram{ + { + Count: 6, + Sum: 6, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []float64{3, 3}, + }, + { + Count: 1, + Sum: 1, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []float64{1}, + }, + }) +} From 1d7fe4be5c581265a5c102f3a4fbdc26af3fd357 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 7 Aug 2024 20:15:46 +0200 Subject: [PATCH 09/83] Update promql/engine.go Signed-off-by: George Krajcsovits --- promql/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promql/engine.go b/promql/engine.go index 621c2116e0..f6b79f3a46 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2357,7 +2357,7 @@ loop: histograms = append(histograms, HPoint{H: &histogram.FloatHistogram{}}) } if histograms[n].H == nil { - // Initialize to non zero to AtFloatHistogram does a copy for sure. + // Make sure to pass non zero H to AtFloatHistogram so that it does a deep-copy. // Not an issue in the loop above since that uses an intermediate buffer. histograms[n].H = &histogram.FloatHistogram{} } From 8978f3ef71fb8d92d9b430cd3a6591b6ca231c7f Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 9 Aug 2024 12:07:28 +0100 Subject: [PATCH 10/83] Cut release 2.54.0 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 8 ++------ VERSION | 2 +- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- web/ui/react-app/package.json | 4 ++-- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e338d9c5c2..8857495048 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,6 @@ # Changelog -## 2.54.0-rc.1 / 2024-08-05 - -* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584 - -## 2.54.0-rc.0 / 2024-07-19 +## 2.54.0 / 2024-08-09 Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/). This is experimental at this time and may still change. @@ -22,7 +18,7 @@ Remote-write v2 is enabled by default, but can be disabled via feature-flag `web * [ENHANCEMENT] TSDB: Optimise seek within index. #14393 * [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307 * [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286 -* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396 +* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584 * [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368 * [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173 * [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156 diff --git a/VERSION b/VERSION index 149ab47321..99aed793ad 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.0-rc.1 +2.54.0 diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 26823cdfdc..6307f12f31 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0-rc.1", + "@prometheus-io/lezer-promql": "0.54.0", "lru-cache": "^7.18.3" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 774c959679..14d263241e 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 3243fe6aaa..0f5db9f6da 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.54.0-rc.1", + "version": "0.54.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.54.0-rc.1", + "version": "0.54.0", "workspaces": [ "react-app", "module/*" @@ -30,10 +30,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0-rc.1", + "@prometheus-io/lezer-promql": "0.54.0", "lru-cache": "^7.18.3" }, "devDependencies": { @@ -69,7 +69,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0-rc.1", + "version": "0.54.0", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.0", @@ -19332,7 +19332,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.54.0-rc.1", + "version": "0.54.0", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -19350,7 +19350,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0-rc.1", + "@prometheus-io/codemirror-promql": "0.54.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/package.json b/web/ui/package.json index fe1e77ef53..01dd71f34a 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -28,5 +28,5 @@ "ts-jest": "^29.2.2", "typescript": "^4.9.5" }, - "version": "0.54.0-rc.1" + "version": "0.54.0" } diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index b653547305..01620531ae 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.54.0-rc.1", + "version": "0.54.0", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0-rc.1", + "@prometheus-io/codemirror-promql": "0.54.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", From 7f0254da299c60f776ffa0c5b8d24d4419d88620 Mon Sep 17 00:00:00 2001 From: Suraj Nath <9503187+electron0zero@users.noreply.github.com> Date: Fri, 9 Aug 2024 22:50:30 +0530 Subject: [PATCH 11/83] chore: migrate stale check to use stale github action Signed-off-by: Suraj Nath <9503187+electron0zero@users.noreply.github.com> --- .github/stale.yml | 56 ------------------------------------- .github/workflows/stale.yml | 32 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 56 deletions(-) delete mode 100644 .github/stale.yml create mode 100644 .github/workflows/stale.yml diff --git a/.github/stale.yml b/.github/stale.yml deleted file mode 100644 index 66a72af533..0000000000 --- a/.github/stale.yml +++ /dev/null @@ -1,56 +0,0 @@ -# Configuration for probot-stale - https://github.com/probot/stale - -# Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 60 - -# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. -# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. -daysUntilClose: false - -# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) -onlyLabels: [] - -# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable -exemptLabels: - - keepalive - -# Set to true to ignore issues in a project (defaults to false) -exemptProjects: false - -# Set to true to ignore issues in a milestone (defaults to false) -exemptMilestones: false - -# Set to true to ignore issues with an assignee (defaults to false) -exemptAssignees: false - -# Label to use when marking as stale -staleLabel: stale - -# Comment to post when marking as stale. Set to `false` to disable -markComment: false - -# Comment to post when removing the stale label. -# unmarkComment: > -# Your comment here. - -# Comment to post when closing a stale Issue or Pull Request. -# closeComment: > -# Your comment here. - -# Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 30 - -# Limit to only `issues` or `pulls` -only: pulls - -# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': -# pulls: -# daysUntilStale: 30 -# markComment: > -# This pull request has been automatically marked as stale because it has not had -# recent activity. It will be closed if no further activity occurs. Thank you -# for your contributions. - -# issues: -# exemptLabels: -# - confirmed diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 0000000000..f817644489 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,32 @@ +name: Stale Check +on: + workflow_dispatch: {} + schedule: + - cron: '0 0 * * *' +permissions: + issues: write + pull-requests: write +jobs: + stale: + # only run on repos in prometheus org, and skip this workflow in forks. + if: github.repository_owner == 'prometheus' + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + # opt out of defaults to avoid marking issues as stale and closing them + # https://github.com/actions/stale#days-before-close + # https://github.com/actions/stale#days-before-stale + days-before-stale: -1 + days-before-close: -1 + # Setting it to empty string to skip comments. + # https://github.com/actions/stale#stale-pr-message + # https://github.com/actions/stale#stale-issue-message + stale-pr-message: '' + stale-issue-message: '' + operations-per-run: 30 + # override days-before-stale, for only marking the pull requests as stale + days-before-pr-stale: 60 + stale-pr-label: stale + exempt-pr-labels: keepalive From 5dcaaee72a629388d7859cfe925ffe26c245cc76 Mon Sep 17 00:00:00 2001 From: Suraj Nath <9503187+electron0zero@users.noreply.github.com> Date: Fri, 9 Aug 2024 22:59:27 +0530 Subject: [PATCH 12/83] add stale check in list of files to sync Signed-off-by: Suraj Nath <9503187+electron0zero@users.noreply.github.com> --- scripts/sync_repo_files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync_repo_files.sh b/scripts/sync_repo_files.sh index 6459fb1e7a..837811a56d 100755 --- a/scripts/sync_repo_files.sh +++ b/scripts/sync_repo_files.sh @@ -37,7 +37,7 @@ if [ -z "${GITHUB_TOKEN}" ]; then fi # List of files that should be synced. -SYNC_FILES="CODE_OF_CONDUCT.md LICENSE Makefile.common SECURITY.md .yamllint scripts/golangci-lint.yml .github/workflows/scorecards.yml .github/workflows/container_description.yml" +SYNC_FILES="CODE_OF_CONDUCT.md LICENSE Makefile.common SECURITY.md .yamllint scripts/golangci-lint.yml .github/workflows/scorecards.yml .github/workflows/container_description.yml .github/workflows/stale.yml" # Go to the root of the repo cd "$(git rev-parse --show-cdup)" || exit 1 From a3b36c12256de33dee5618fcc2de3ba53f24b8f6 Mon Sep 17 00:00:00 2001 From: Suraj Nath <9503187+electron0zero@users.noreply.github.com> Date: Fri, 9 Aug 2024 23:06:11 +0530 Subject: [PATCH 13/83] pin to SHA of github action Signed-off-by: Suraj Nath <9503187+electron0zero@users.noreply.github.com> --- .github/workflows/stale.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index f817644489..0a57810a0b 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -12,7 +12,8 @@ jobs: if: github.repository_owner == 'prometheus' runs-on: ubuntu-latest steps: - - uses: actions/stale@v9 + # pin to SHA of v9.0.0: https://github.com/actions/stale/releases/tag/v9.0.0 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e with: repo-token: ${{ secrets.GITHUB_TOKEN }} # opt out of defaults to avoid marking issues as stale and closing them From 94ad489328623fd9a4df33e498138b48d5363c0c Mon Sep 17 00:00:00 2001 From: suntala Date: Sun, 11 Aug 2024 21:07:54 +0200 Subject: [PATCH 14/83] Fall back to comparing by label set when sorting by label Co-authored-by: Aleks Fazlieva Signed-off-by: suntala --- promql/functions.go | 10 +++++----- promql/promqltest/testdata/functions.test | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 018023bf02..353d4155a8 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -406,10 +406,10 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // In case the labels are the same, NaN should sort to the bottom, so take - // ascending sort with NaN first and reverse it. - var anno annotations.Annotations - vals[0], anno = funcSort(vals, args, enh) + slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { + return labels.Compare(a.Metric, b.Metric) + }) + labels := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { // Iterate over each given label @@ -431,7 +431,7 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return 0 }) - return vals[0].(Vector), anno + return vals[0].(Vector), nil } // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index b89d44fced..177afca353 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -523,16 +523,16 @@ load 5m node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 0+10x10 eval_ordered instant at 50m sort_by_label(http_requests, "instance") - http_requests{group="production", instance="0", job="api-server"} 100 http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="production", instance="0", job="app-server"} 500 http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="production", instance="0", job="app-server"} 500 http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="production", instance="1", job="app-server"} 600 http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="2", job="api-server"} 100 + http_requests{group="production", instance="1", job="api-server"} 200 + http_requests{group="production", instance="1", job="app-server"} 600 http_requests{group="canary", instance="2", job="api-server"} NaN + http_requests{group="production", instance="2", job="api-server"} 100 eval_ordered instant at 50m sort_by_label(http_requests, "instance", "group") http_requests{group="canary", instance="0", job="api-server"} 300 From fd2f44af7fa48c5b57df51883b6f69c2e73a4809 Mon Sep 17 00:00:00 2001 From: suntala Date: Sun, 11 Aug 2024 21:24:09 +0200 Subject: [PATCH 15/83] Fall back to comparing by label set when sorting by label desc Co-authored-by: Aleks Fazlieva Signed-off-by: suntala --- promql/functions.go | 10 +++++----- promql/promqltest/testdata/functions.test | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 353d4155a8..e9bfe45f4a 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -436,10 +436,10 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // In case the labels are the same, NaN should sort to the bottom, so take - // ascending sort with NaN first and reverse it. - var anno annotations.Annotations - vals[0], anno = funcSortDesc(vals, args, enh) + slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { + return labels.Compare(b.Metric, a.Metric) + }) + labels := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { // Iterate over each given label @@ -461,7 +461,7 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return 0 }) - return vals[0].(Vector), anno + return vals[0].(Vector), nil } // === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) === diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 177afca353..6e2b3630bc 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -585,14 +585,14 @@ eval_ordered instant at 50m sort_by_label(http_requests, "job", "instance", "gro eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance") http_requests{group="production", instance="2", job="api-server"} 100 http_requests{group="canary", instance="2", job="api-server"} NaN - http_requests{group="canary", instance="1", job="app-server"} 800 http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="canary", instance="1", job="api-server"} 400 http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="1", job="app-server"} 800 + http_requests{group="canary", instance="1", job="api-server"} 400 http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="canary", instance="0", job="api-server"} 300 http_requests{group="production", instance="0", job="api-server"} 100 + http_requests{group="canary", instance="0", job="app-server"} 700 + http_requests{group="canary", instance="0", job="api-server"} 300 eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group") http_requests{group="production", instance="2", job="api-server"} 100 From 532904a1d6c34ec2e7064bc0ceb959a6b63acaf7 Mon Sep 17 00:00:00 2001 From: suntala Date: Sun, 11 Aug 2024 21:34:39 +0200 Subject: [PATCH 16/83] Document changes to sort by label Co-authored-by: Aleks Fazlieva Signed-off-by: suntala --- docs/querying/functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/querying/functions.md b/docs/querying/functions.md index ee81328b5e..951b90a68e 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -619,7 +619,7 @@ Like `sort`, `sort_desc` only affects the results of instant queries, as range q **This function has to be enabled via the [feature flag](../feature_flags.md) `--enable-feature=promql-experimental-functions`.** -`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order. +`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by the values of the given labels in ascending order. In case these label values are equal, elements are sorted by their full label sets. Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering. From 74c1af3fd57cdd25913ec5b59cd0492d7e2a9f84 Mon Sep 17 00:00:00 2001 From: Suraj Nath <9503187+electron0zero@users.noreply.github.com> Date: Mon, 12 Aug 2024 23:36:48 +0530 Subject: [PATCH 17/83] address review comments Signed-off-by: Suraj Nath <9503187+electron0zero@users.noreply.github.com> --- .github/workflows/stale.yml | 8 +++----- scripts/sync_repo_files.sh | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 0a57810a0b..d71bcbc9d8 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -2,18 +2,16 @@ name: Stale Check on: workflow_dispatch: {} schedule: - - cron: '0 0 * * *' + - cron: '16 22 * * *' permissions: issues: write pull-requests: write jobs: stale: - # only run on repos in prometheus org, and skip this workflow in forks. - if: github.repository_owner == 'prometheus' + if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. runs-on: ubuntu-latest steps: - # pin to SHA of v9.0.0: https://github.com/actions/stale/releases/tag/v9.0.0 - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} # opt out of defaults to avoid marking issues as stale and closing them diff --git a/scripts/sync_repo_files.sh b/scripts/sync_repo_files.sh index 837811a56d..1029336298 100755 --- a/scripts/sync_repo_files.sh +++ b/scripts/sync_repo_files.sh @@ -37,7 +37,7 @@ if [ -z "${GITHUB_TOKEN}" ]; then fi # List of files that should be synced. -SYNC_FILES="CODE_OF_CONDUCT.md LICENSE Makefile.common SECURITY.md .yamllint scripts/golangci-lint.yml .github/workflows/scorecards.yml .github/workflows/container_description.yml .github/workflows/stale.yml" +SYNC_FILES="CODE_OF_CONDUCT.md LICENSE Makefile.common SECURITY.md .yamllint scripts/golangci-lint.yml .github/workflows/scorecards.yml .github/workflows/container_description.yml .github/workflows/stale.yml" # Go to the root of the repo cd "$(git rev-parse --show-cdup)" || exit 1 From 6548421f61d668ea0399271b0dbb6fc907bdcf13 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 16 Aug 2024 13:37:05 +1000 Subject: [PATCH 18/83] promqltest: improve error messages when unexpected type is returned, and fix inverted expected and actual values in error message when scalar value does not match expected Signed-off-by: Charles Korn --- promql/promqltest/test.go | 24 ++++++++++++++++------ promql/promqltest/test_test.go | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 8b1ec381ad..aa98f12460 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -650,8 +650,9 @@ type evalCmd struct { expectedFailMessage string expectedFailRegexp *regexp.Regexp - metrics map[uint64]labels.Labels - expected map[uint64]entry + metrics map[uint64]labels.Labels + expectScalar bool + expected map[uint64]entry } type entry struct { @@ -695,12 +696,15 @@ func (ev *evalCmd) String() string { // expect adds a sequence of values to the set of expected // results for the query. func (ev *evalCmd) expect(pos int, vals ...parser.SequenceValue) { + ev.expectScalar = true ev.expected[0] = entry{pos: pos, vals: vals} } // expectMetric adds a new metric with a sequence of values to the set of expected // results for the query. func (ev *evalCmd) expectMetric(pos int, m labels.Labels, vals ...parser.SequenceValue) { + ev.expectScalar = false + h := m.Hash() ev.metrics[h] = m ev.expected[h] = entry{pos: pos, vals: vals} @@ -714,6 +718,10 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected ordered result, but query returned a matrix") } + if ev.expectScalar { + return fmt.Errorf("expected scalar result, but got matrix %s", val.String()) + } + if err := assertMatrixSorted(val); err != nil { return err } @@ -782,6 +790,10 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } case promql.Vector: + if ev.expectScalar { + return fmt.Errorf("expected scalar result, but got vector %s", val.String()) + } + seen := map[uint64]bool{} for pos, v := range val { fp := v.Metric.Hash() @@ -820,15 +832,15 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } case promql.Scalar: - if len(ev.expected) != 1 { - return fmt.Errorf("expected vector result, but got scalar %s", val.String()) + if !ev.expectScalar { + return fmt.Errorf("expected vector or matrix result, but got %s", val.String()) } exp0 := ev.expected[0].vals[0] if exp0.Histogram != nil { - return fmt.Errorf("expected Histogram %v but got scalar %s", exp0.Histogram.TestExpression(), val.String()) + return fmt.Errorf("expected histogram %v but got %s", exp0.Histogram.TestExpression(), val.String()) } if !almost.Equal(exp0.Value, val.V, defaultEpsilon) { - return fmt.Errorf("expected Scalar %v but got %v", val.V, exp0.Value) + return fmt.Errorf("expected scalar %v but got %v", exp0.Value, val.V) } default: diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index faffb1dd17..49b43eb126 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -554,6 +554,43 @@ eval range from 0 to 5m step 5m testmetric `, expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{count:0, sum:0} @[300000]])`, }, + "instant query with expected scalar result": { + input: ` + eval instant at 1m 3 + 3 + `, + }, + "instant query with unexpected scalar result": { + input: ` + eval instant at 1m 3 + 2 + `, + expectedError: `error in eval 3 (line 2): expected scalar 2 but got 3`, + }, + "instant query that returns a scalar but expects a vector": { + input: ` + eval instant at 1m 3 + {} 3 + `, + expectedError: `error in eval 3 (line 2): expected vector or matrix result, but got scalar: 3 @[60000]`, + }, + "instant query that returns a vector but expects a scalar": { + input: ` + eval instant at 1m vector(3) + 3 + `, + expectedError: `error in eval vector(3) (line 2): expected scalar result, but got vector {} => 3 @[60000]`, + }, + "range query that returns a matrix but expects a scalar": { + input: ` + eval range from 0 to 1m step 30s vector(3) + 3 + `, + expectedError: `error in eval vector(3) (line 2): expected scalar result, but got matrix {} => +3 @[0] +3 @[30000] +3 @[60000]`, + }, } for name, testCase := range testCases { From 144470c7b011be44ab73e4fb3d7fe228f401a016 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 19 Aug 2024 10:58:35 +0100 Subject: [PATCH 19/83] [BUGFIX] Scraping: allow multiple samples on same series (#14685) So long as they specify timestamps. We don't check that the timestamps are different. Extend test, and use client_golang/prometheus/testutil to simplify metric check. Signed-off-by: Bryan Boreham --- scrape/scrape.go | 2 +- scrape/scrape_test.go | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index 68411a62e0..ccb068b680 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1631,7 +1631,7 @@ loop: updateMetadata(lset, true) } - if seriesAlreadyScraped { + if seriesAlreadyScraped && parsedTimestamp == nil { err = storage.ErrDuplicateSampleForTimestamp } else { if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a3fe6ac1a5..c5a0b8b831 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -35,6 +35,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -3627,6 +3628,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { require.Equal(t, 3, total) require.Equal(t, 3, added) require.Equal(t, 1, seriesAdded) + require.Equal(t, 2.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) slApp = sl.appender(ctx) total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "", time.Time{}) @@ -3635,12 +3637,18 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { require.Equal(t, 3, total) require.Equal(t, 3, added) require.Equal(t, 0, seriesAdded) + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) - metric := dto.Metric{} - err = sl.metrics.targetScrapeSampleDuplicate.Write(&metric) + // When different timestamps are supplied, multiple samples are accepted. + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "", time.Time{}) require.NoError(t, err) - value := metric.GetCounter().GetValue() - require.Equal(t, 4.0, value) + require.NoError(t, slApp.Commit()) + require.Equal(t, 3, total) + require.Equal(t, 3, added) + require.Equal(t, 0, seriesAdded) + // Metric is not higher than last time. + require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) } // This tests running a full scrape loop and checking that the scrape option From 89dee48cc81df4091ae1f9c1e402f32b47183440 Mon Sep 17 00:00:00 2001 From: "ouyang1204@gmail.com" Date: Sun, 11 Aug 2024 15:31:11 +0800 Subject: [PATCH 20/83] fix the issue of failing to match the first network when the container is reconnected to a new network Signed-off-by: ouyang1204@gmail.com --- discovery/moby/docker.go | 33 +++-- discovery/moby/docker_test.go | 119 ++++++++++++++++-- .../testdata/dockerprom/containers/json.json | 69 ++++++++++ docs/configuration/configuration.md | 4 +- 4 files changed, 198 insertions(+), 27 deletions(-) diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 11445092ee..68f6fe3ccc 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -19,6 +19,7 @@ import ( "net" "net/http" "net/url" + "sort" "strconv" "time" @@ -251,28 +252,26 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er } if d.matchFirstNetwork && len(networks) > 1 { - // Match user defined network - if containerNetworkMode.IsUserDefined() { - networkMode := string(containerNetworkMode) - networks = map[string]*network.EndpointSettings{networkMode: networks[networkMode]} - } else { - // Get first network if container network mode has "none" value. - // This case appears under certain condition: - // 1. Container created with network set to "--net=none". - // 2. Disconnect network "none". - // 3. Reconnect network with user defined networks. - var first string - for k, n := range networks { - if n != nil { - first = k - break - } + // Sort networks by name and take first non-nil network. + keys := make([]string, 0, len(networks)) + for k, n := range networks { + if n != nil { + keys = append(keys, k) } - networks = map[string]*network.EndpointSettings{first: networks[first]} + } + if len(keys) > 0 { + sort.Strings(keys) + firstNetworkMode := keys[0] + firstNetwork := networks[firstNetworkMode] + networks = map[string]*network.EndpointSettings{firstNetworkMode: firstNetwork} } } for _, n := range networks { + if n == nil { + continue + } + var added bool for _, p := range c.Ports { diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index c108ddf582..398393a15a 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -60,9 +60,9 @@ host: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 6) + require.Len(t, tg.Targets, 8) - for i, lbls := range []model.LabelSet{ + expected := []model.LabelSet{ { "__address__": "172.19.0.2:9100", "__meta_docker_container_id": "c301b928faceb1a18fe379f6bc178727ef920bb30b0f9b8592b32b36255a0eca", @@ -163,7 +163,43 @@ host: %s "__meta_docker_network_scope": "local", "__meta_docker_port_private": "9104", }, - } { + { + "__address__": "172.20.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.20.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, + } + sortFunc(expected) + sortFunc(tg.Targets) + + for i, lbls := range expected { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) }) @@ -202,13 +238,8 @@ host: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 9) + require.Len(t, tg.Targets, 13) - sortFunc := func(labelSets []model.LabelSet) { - sort.Slice(labelSets, func(i, j int) bool { - return labelSets[i]["__address__"] < labelSets[j]["__address__"] - }) - } expected := []model.LabelSet{ { "__address__": "172.19.0.2:9100", @@ -359,6 +390,70 @@ host: %s "__meta_docker_network_scope": "local", "__meta_docker_port_private": "9104", }, + { + "__address__": "172.20.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.20.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.20.0.3", + "__meta_docker_network_name": "dockersd_private", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, + { + "__address__": "172.21.0.3:3306", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.21.0.3", + "__meta_docker_network_name": "dockersd_private1", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "3306", + }, + { + "__address__": "172.21.0.3:33060", + "__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "__meta_docker_container_label_com_docker_compose_project": "dockersd", + "__meta_docker_container_label_com_docker_compose_service": "mysql", + "__meta_docker_container_label_com_docker_compose_version": "2.2.2", + "__meta_docker_container_name": "/dockersd_multi_networks", + "__meta_docker_container_network_mode": "dockersd_private_none", + "__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "__meta_docker_network_ingress": "false", + "__meta_docker_network_internal": "false", + "__meta_docker_network_ip": "172.21.0.3", + "__meta_docker_network_name": "dockersd_private1", + "__meta_docker_network_scope": "local", + "__meta_docker_port_private": "33060", + }, } sortFunc(expected) @@ -370,3 +465,9 @@ host: %s }) } } + +func sortFunc(labelSets []model.LabelSet) { + sort.Slice(labelSets, func(i, j int) bool { + return labelSets[i]["__address__"] < labelSets[j]["__address__"] + }) +} diff --git a/discovery/moby/testdata/dockerprom/containers/json.json b/discovery/moby/testdata/dockerprom/containers/json.json index ebfc56b6d5..33406bf9a4 100644 --- a/discovery/moby/testdata/dockerprom/containers/json.json +++ b/discovery/moby/testdata/dockerprom/containers/json.json @@ -228,5 +228,74 @@ "Networks": {} }, "Mounts": [] + }, + { + "Id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f", + "Names": [ + "/dockersd_multi_networks" + ], + "Image": "mysql:5.7.29", + "ImageID": "sha256:16ae2f4625ba63a250462bedeece422e741de9f0caf3b1d89fd5b257aca80cd1", + "Command": "mysqld", + "Created": 1616273136, + "Ports": [ + { + "PrivatePort": 3306, + "Type": "tcp" + }, + { + "PrivatePort": 33060, + "Type": "tcp" + } + ], + "Labels": { + "com.docker.compose.project": "dockersd", + "com.docker.compose.service": "mysql", + "com.docker.compose.version": "2.2.2" + }, + "State": "running", + "Status": "Up 40 seconds", + "HostConfig": { + "NetworkMode": "dockersd_private_none" + }, + "NetworkSettings": { + "Networks": { + "dockersd_private": { + "IPAMConfig": null, + "Links": null, + "Aliases": null, + "NetworkID": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601", + "EndpointID": "972d6807997369605ace863af58de6cb90c787a5bf2ffc4105662d393ae539b7", + "Gateway": "172.20.0.1", + "IPAddress": "172.20.0.3", + "IPPrefixLen": 16, + "IPv6Gateway": "", + "GlobalIPv6Address": "", + "GlobalIPv6PrefixLen": 0, + "MacAddress": "02:42:ac:14:00:02", + "DriverOpts": null + }, + "dockersd_private1": { + "IPAMConfig": {}, + "Links": null, + "Aliases": [ + "mysql", + "mysql", + "f9ade4b83199" + ], + "NetworkID": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8", + "EndpointID": "91a98405344ee1cb7d977cafabe634837876651544b32da20a5e0155868e6f5f", + "Gateway": "172.21.0.1", + "IPAddress": "172.21.0.3", + "IPPrefixLen": 24, + "IPv6Gateway": "", + "GlobalIPv6Address": "", + "GlobalIPv6PrefixLen": 0, + "MacAddress": "02:42:ac:15:00:02", + "DriverOpts": null + } + } + }, + "Mounts": [] } ] diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index ff24082e40..6ce3d9c48a 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -943,7 +943,9 @@ tls_config: # The host to use if the container is in host networking mode. [ host_networking_host: | default = "localhost" ] -# Match the first network if the container has multiple networks defined, thus avoiding collecting duplicate targets. +# Sort all non-nil networks in ascending order based on network name and +# get the first network if the container has multiple networks defined, +# thus avoiding collecting duplicate targets. [ match_first_network: | default = true ] # Optional filters to limit the discovery process to a subset of available From 9b5e7623f457db15715f5eba99983ec9c4beab86 Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Tue, 13 Aug 2024 14:34:26 +0200 Subject: [PATCH 21/83] Add support for multiple listening addresses Fixes #9105 Signed-off-by: Julien Pivotto Signed-off-by: Julien --- cmd/prometheus/main.go | 14 +- docs/command-line/prometheus.md | 6 +- docs/command-line/promtool.md | 14 +- go.mod | 2 +- util/documentcli/documentcli.go | 11 ++ util/netconnlimit/netconnlimit.go | 97 ++++++++++++ util/netconnlimit/netconnlimit_test.go | 124 +++++++++++++++ web/web.go | 38 +++-- web/web_test.go | 208 +++++++++++++++++-------- 9 files changed, 420 insertions(+), 94 deletions(-) create mode 100644 util/netconnlimit/netconnlimit.go create mode 100644 util/netconnlimit/netconnlimit_test.go diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 94924d2c4e..c402fbcb80 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -297,8 +297,8 @@ func main() { a.Flag("config.file", "Prometheus configuration file path."). Default("prometheus.yml").StringVar(&cfg.configFile) - a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry."). - Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress) + a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated."). + Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses) a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory"). Default("0.9").FloatVar(&cfg.memlimitRatio) @@ -312,7 +312,7 @@ func main() { "Maximum duration before timing out read of the request, and closing idle connections."). Default("5m").SetValue(&cfg.webTimeout) - a.Flag("web.max-connections", "Maximum number of simultaneous connections."). + a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners."). Default("512").IntVar(&cfg.web.MaxConnections) a.Flag("web.external-url", @@ -544,7 +544,7 @@ func main() { localStoragePath = cfg.agentStoragePath } - cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress) + cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddresses[0]) if err != nil { fmt.Fprintln(os.Stderr, fmt.Errorf("parse external URL %q: %w", cfg.prometheusURL, err)) os.Exit(2) @@ -990,9 +990,9 @@ func main() { }) } - listener, err := webHandler.Listener() + listeners, err := webHandler.Listeners() if err != nil { - level.Error(logger).Log("msg", "Unable to start web listener", "err", err) + level.Error(logger).Log("msg", "Unable to start web listeners", "err", err) os.Exit(1) } @@ -1287,7 +1287,7 @@ func main() { // Web handler. g.Add( func() error { - if err := webHandler.Run(ctxWeb, listener, *webConfig); err != nil { + if err := webHandler.Run(ctxWeb, listeners, *webConfig); err != nil { return fmt.Errorf("error starting web server: %w", err) } return nil diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index a16e807e1c..f65c260c40 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -15,11 +15,11 @@ The Prometheus monitoring server | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | | --version | Show application version. | | | --config.file | Prometheus configuration file path. | `prometheus.yml` | -| --web.listen-address | Address to listen on for UI, API, and telemetry. | `0.0.0.0:9090` | +| --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` | | --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` | | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | | --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | -| --web.max-connections | Maximum number of simultaneous connections. | `512` | +| --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` | | --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | | | --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | | | --web.user-assets | Path to static asset directory, available at /user. | | @@ -57,7 +57,7 @@ The Prometheus monitoring server | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | | --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 6bb80169a9..0c397387ff 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system. | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | --version | Show application version. | | --experimental | Enable experimental commands. | -| --enable-feature | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. | +| --enable-feature ... | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. | @@ -281,7 +281,7 @@ Run series query. | Flag | Description | | --- | --- | -| --match | Series selector. Can be specified multiple times. | +| --match ... | Series selector. Can be specified multiple times. | | --start | Start time (RFC3339 or Unix timestamp). | | --end | End time (RFC3339 or Unix timestamp). | @@ -309,7 +309,7 @@ Run labels query. | --- | --- | | --start | Start time (RFC3339 or Unix timestamp). | | --end | End time (RFC3339 or Unix timestamp). | -| --match | Series selector. Can be specified multiple times. | +| --match ... | Series selector. Can be specified multiple times. | @@ -338,7 +338,7 @@ Run queries against your Prometheus to analyze the usage pattern of certain metr | --type | Type of metric: histogram. | | | --duration | Time frame to analyze. | `1h` | | --time | Query time (RFC3339 or Unix timestamp), defaults to now. | | -| --match | Series selector. Can be specified multiple times. | | +| --match ... | Series selector. Can be specified multiple times. | | @@ -461,7 +461,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | -| --run | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | @@ -578,7 +578,7 @@ Dump samples from a TSDB. | --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | -| --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | +| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | @@ -605,7 +605,7 @@ Dump samples from a TSDB. | --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | -| --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | +| --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | diff --git a/go.mod b/go.mod index 1c92e52bd2..50d560bc3a 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,6 @@ require ( go.uber.org/automaxprocs v1.5.3 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 - golang.org/x/net v0.27.0 golang.org/x/oauth2 v0.21.0 golang.org/x/sync v0.7.0 golang.org/x/sys v0.22.0 @@ -190,6 +189,7 @@ require ( golang.org/x/crypto v0.25.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect golang.org/x/mod v0.19.0 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/term v0.22.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/util/documentcli/documentcli.go b/util/documentcli/documentcli.go index 9de2bb8d4c..6964952af4 100644 --- a/util/documentcli/documentcli.go +++ b/util/documentcli/documentcli.go @@ -23,6 +23,7 @@ import ( "bytes" "fmt" "io" + "reflect" "strings" "github.com/alecthomas/kingpin/v2" @@ -75,6 +76,16 @@ func createFlagRow(flag *kingpin.FlagModel) []string { name = fmt.Sprintf(`-%c, --%s`, flag.Short, flag.Name) } + valueType := reflect.TypeOf(flag.Value) + if valueType.Kind() == reflect.Ptr { + valueType = valueType.Elem() + } + if valueType.Kind() == reflect.Struct { + if _, found := valueType.FieldByName("slice"); found { + name = fmt.Sprintf(`%s ...`, name) + } + } + return []string{name, strings.ReplaceAll(flag.Help, "|", `\|`), defaultVal} } diff --git a/util/netconnlimit/netconnlimit.go b/util/netconnlimit/netconnlimit.go new file mode 100644 index 0000000000..3bdd805b83 --- /dev/null +++ b/util/netconnlimit/netconnlimit.go @@ -0,0 +1,97 @@ +// Copyright 2024 The Prometheus Authors +// Based on golang.org/x/net/netutil: +// Copyright 2013 The Go Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package netconnlimit provides network utility functions for limiting +// simultaneous connections across multiple listeners. +package netconnlimit + +import ( + "net" + "sync" +) + +// NewSharedSemaphore creates and returns a new semaphore channel that can be used +// to limit the number of simultaneous connections across multiple listeners. +func NewSharedSemaphore(n int) chan struct{} { + return make(chan struct{}, n) +} + +// SharedLimitListener returns a listener that accepts at most n simultaneous +// connections across multiple listeners using the provided shared semaphore. +func SharedLimitListener(l net.Listener, sem chan struct{}) net.Listener { + return &sharedLimitListener{ + Listener: l, + sem: sem, + done: make(chan struct{}), + } +} + +type sharedLimitListener struct { + net.Listener + sem chan struct{} + closeOnce sync.Once // Ensures the done chan is only closed once. + done chan struct{} // No values sent; closed when Close is called. +} + +// Acquire acquires the shared semaphore. Returns true if successfully +// acquired, false if the listener is closed and the semaphore is not +// acquired. +func (l *sharedLimitListener) acquire() bool { + select { + case <-l.done: + return false + case l.sem <- struct{}{}: + return true + } +} + +func (l *sharedLimitListener) release() { <-l.sem } + +func (l *sharedLimitListener) Accept() (net.Conn, error) { + if !l.acquire() { + for { + c, err := l.Listener.Accept() + if err != nil { + return nil, err + } + c.Close() + } + } + + c, err := l.Listener.Accept() + if err != nil { + l.release() + return nil, err + } + return &sharedLimitListenerConn{Conn: c, release: l.release}, nil +} + +func (l *sharedLimitListener) Close() error { + err := l.Listener.Close() + l.closeOnce.Do(func() { close(l.done) }) + return err +} + +type sharedLimitListenerConn struct { + net.Conn + releaseOnce sync.Once + release func() +} + +func (l *sharedLimitListenerConn) Close() error { + err := l.Conn.Close() + l.releaseOnce.Do(l.release) + return err +} diff --git a/util/netconnlimit/netconnlimit_test.go b/util/netconnlimit/netconnlimit_test.go new file mode 100644 index 0000000000..e4d4904209 --- /dev/null +++ b/util/netconnlimit/netconnlimit_test.go @@ -0,0 +1,124 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package netconnlimit + +import ( + "io" + "net" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestSharedLimitListenerConcurrency(t *testing.T) { + testCases := []struct { + name string + semCapacity int + connCount int + expected int // Expected number of connections processed simultaneously. + }{ + { + name: "Single connection allowed", + semCapacity: 1, + connCount: 3, + expected: 1, + }, + { + name: "Two connections allowed", + semCapacity: 2, + connCount: 3, + expected: 2, + }, + { + name: "Three connections allowed", + semCapacity: 3, + connCount: 3, + expected: 3, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + sem := NewSharedSemaphore(tc.semCapacity) + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err, "failed to create listener") + defer listener.Close() + + limitedListener := SharedLimitListener(listener, sem) + + var wg sync.WaitGroup + var activeConnCount int64 + var mu sync.Mutex + + wg.Add(tc.connCount) + + // Accept connections. + for i := 0; i < tc.connCount; i++ { + go func() { + defer wg.Done() + + conn, err := limitedListener.Accept() + require.NoError(t, err, "failed to accept connection") + defer conn.Close() + + // Simulate work and track the active connection count. + mu.Lock() + activeConnCount++ + require.LessOrEqual(t, activeConnCount, int64(tc.expected), "too many simultaneous connections") + mu.Unlock() + + time.Sleep(100 * time.Millisecond) + + mu.Lock() + activeConnCount-- + mu.Unlock() + }() + } + + // Create clients that attempt to connect to the listener. + for i := 0; i < tc.connCount; i++ { + go func() { + conn, err := net.Dial("tcp", listener.Addr().String()) + require.NoError(t, err, "failed to connect to listener") + defer conn.Close() + _, _ = io.WriteString(conn, "hello") + }() + } + + wg.Wait() + + // Ensure all connections are released and semaphore is empty. + require.Empty(t, sem) + }) + } +} + +func TestSharedLimitListenerClose(t *testing.T) { + sem := NewSharedSemaphore(2) + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err, "failed to create listener") + + limitedListener := SharedLimitListener(listener, sem) + + // Close the listener and ensure it does not accept new connections. + err = limitedListener.Close() + require.NoError(t, err, "failed to close listener") + + conn, err := limitedListener.Accept() + require.Error(t, err, "expected error on accept after listener closed") + if conn != nil { + conn.Close() + } +} diff --git a/web/web.go b/web/web.go index 8e84acd039..098baa055e 100644 --- a/web/web.go +++ b/web/web.go @@ -49,7 +49,6 @@ import ( toolkit_web "github.com/prometheus/exporter-toolkit/web" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.uber.org/atomic" - "golang.org/x/net/netutil" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notifier" @@ -59,6 +58,7 @@ import ( "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/template" "github.com/prometheus/prometheus/util/httputil" + "github.com/prometheus/prometheus/util/netconnlimit" api_v1 "github.com/prometheus/prometheus/web/api/v1" "github.com/prometheus/prometheus/web/ui" ) @@ -244,7 +244,7 @@ type Options struct { Version *PrometheusVersion Flags map[string]string - ListenAddress string + ListenAddresses []string CORSOrigin *regexp.Regexp ReadTimeout time.Duration MaxConnections int @@ -334,7 +334,7 @@ func New(logger log.Logger, o *Options) *Handler { }, o.Flags, api_v1.GlobalURLOptions{ - ListenAddress: o.ListenAddress, + ListenAddress: o.ListenAddresses[0], Host: o.ExternalURL.Host, Scheme: o.ExternalURL.Scheme, }, @@ -566,15 +566,29 @@ func (h *Handler) Reload() <-chan chan error { return h.reloadCh } -// Listener creates the TCP listener for web requests. -func (h *Handler) Listener() (net.Listener, error) { - level.Info(h.logger).Log("msg", "Start listening for connections", "address", h.options.ListenAddress) +// Listeners creates the TCP listeners for web requests. +func (h *Handler) Listeners() ([]net.Listener, error) { + var listeners []net.Listener + sem := netconnlimit.NewSharedSemaphore(h.options.MaxConnections) + for _, address := range h.options.ListenAddresses { + listener, err := h.Listener(address, sem) + if err != nil { + return listeners, err + } + listeners = append(listeners, listener) + } + return listeners, nil +} - listener, err := net.Listen("tcp", h.options.ListenAddress) +// Listener creates the TCP listener for web requests. +func (h *Handler) Listener(address string, sem chan struct{}) (net.Listener, error) { + level.Info(h.logger).Log("msg", "Start listening for connections", "address", address) + + listener, err := net.Listen("tcp", address) if err != nil { return listener, err } - listener = netutil.LimitListener(listener, h.options.MaxConnections) + listener = netconnlimit.SharedLimitListener(listener, sem) // Monitor incoming connections with conntrack. listener = conntrack.NewListener(listener, @@ -585,10 +599,10 @@ func (h *Handler) Listener() (net.Listener, error) { } // Run serves the HTTP endpoints. -func (h *Handler) Run(ctx context.Context, listener net.Listener, webConfig string) error { - if listener == nil { +func (h *Handler) Run(ctx context.Context, listeners []net.Listener, webConfig string) error { + if len(listeners) == 0 { var err error - listener, err = h.Listener() + listeners, err = h.Listeners() if err != nil { return err } @@ -623,7 +637,7 @@ func (h *Handler) Run(ctx context.Context, listener net.Listener, webConfig stri errCh := make(chan error, 1) go func() { - errCh <- toolkit_web.Serve(listener, httpSrv, &toolkit_web.FlagConfig{WebConfigFile: &webConfig}, h.logger) + errCh <- toolkit_web.ServeMultiple(listeners, httpSrv, &toolkit_web.FlagConfig{WebConfigFile: &webConfig}, h.logger) }() select { diff --git a/web/web_test.go b/web/web_test.go index e1fa66fa8b..b660746b13 100644 --- a/web/web_test.go +++ b/web/web_test.go @@ -73,19 +73,19 @@ func TestReadyAndHealthy(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) opts := &Options{ - ListenAddress: port, - ReadTimeout: 30 * time.Second, - MaxConnections: 512, - Context: nil, - Storage: nil, - LocalStorage: &dbAdapter{db}, - TSDBDir: dbDir, - QueryEngine: nil, - ScrapeManager: &scrape.Manager{}, - RuleManager: &rules.Manager{}, - Notifier: nil, - RoutePrefix: "/", - EnableAdminAPI: true, + ListenAddresses: []string{port}, + ReadTimeout: 30 * time.Second, + MaxConnections: 512, + Context: nil, + Storage: nil, + LocalStorage: &dbAdapter{db}, + TSDBDir: dbDir, + QueryEngine: nil, + ScrapeManager: &scrape.Manager{}, + RuleManager: &rules.Manager{}, + Notifier: nil, + RoutePrefix: "/", + EnableAdminAPI: true, ExternalURL: &url.URL{ Scheme: "http", Host: "localhost" + port, @@ -101,9 +101,9 @@ func TestReadyAndHealthy(t *testing.T) { webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} - l, err := webHandler.Listener() + l, err := webHandler.Listeners() if err != nil { - panic(fmt.Sprintf("Unable to start web listener: %s", err)) + panic(fmt.Sprintf("Unable to start web listeners: %s", err)) } ctx, cancel := context.WithCancel(context.Background()) @@ -198,19 +198,19 @@ func TestRoutePrefix(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) opts := &Options{ - ListenAddress: port, - ReadTimeout: 30 * time.Second, - MaxConnections: 512, - Context: nil, - TSDBDir: dbDir, - LocalStorage: &dbAdapter{db}, - Storage: nil, - QueryEngine: nil, - ScrapeManager: nil, - RuleManager: nil, - Notifier: nil, - RoutePrefix: "/prometheus", - EnableAdminAPI: true, + ListenAddresses: []string{port}, + ReadTimeout: 30 * time.Second, + MaxConnections: 512, + Context: nil, + TSDBDir: dbDir, + LocalStorage: &dbAdapter{db}, + Storage: nil, + QueryEngine: nil, + ScrapeManager: nil, + RuleManager: nil, + Notifier: nil, + RoutePrefix: "/prometheus", + EnableAdminAPI: true, ExternalURL: &url.URL{ Host: "localhost.localdomain" + port, Scheme: "http", @@ -220,9 +220,9 @@ func TestRoutePrefix(t *testing.T) { opts.Flags = map[string]string{} webHandler := New(nil, opts) - l, err := webHandler.Listener() + l, err := webHandler.Listeners() if err != nil { - panic(fmt.Sprintf("Unable to start web listener: %s", err)) + panic(fmt.Sprintf("Unable to start web listeners: %s", err)) } ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -299,8 +299,8 @@ func TestDebugHandler(t *testing.T) { {"/foo", "/bar/debug/pprof/goroutine", 404}, } { opts := &Options{ - RoutePrefix: tc.prefix, - ListenAddress: "somehost:9090", + RoutePrefix: tc.prefix, + ListenAddresses: []string{"somehost:9090"}, ExternalURL: &url.URL{ Host: "localhost.localdomain:9090", Scheme: "http", @@ -324,8 +324,8 @@ func TestDebugHandler(t *testing.T) { func TestHTTPMetrics(t *testing.T) { t.Parallel() handler := New(nil, &Options{ - RoutePrefix: "/", - ListenAddress: "somehost:9090", + RoutePrefix: "/", + ListenAddresses: []string{"somehost:9090"}, ExternalURL: &url.URL{ Host: "localhost.localdomain:9090", Scheme: "http", @@ -381,18 +381,18 @@ func TestShutdownWithStaleConnection(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) opts := &Options{ - ListenAddress: port, - ReadTimeout: timeout, - MaxConnections: 512, - Context: nil, - Storage: nil, - LocalStorage: &dbAdapter{db}, - TSDBDir: dbDir, - QueryEngine: nil, - ScrapeManager: &scrape.Manager{}, - RuleManager: &rules.Manager{}, - Notifier: nil, - RoutePrefix: "/", + ListenAddresses: []string{port}, + ReadTimeout: timeout, + MaxConnections: 512, + Context: nil, + Storage: nil, + LocalStorage: &dbAdapter{db}, + TSDBDir: dbDir, + QueryEngine: nil, + ScrapeManager: &scrape.Manager{}, + RuleManager: &rules.Manager{}, + Notifier: nil, + RoutePrefix: "/", ExternalURL: &url.URL{ Scheme: "http", Host: "localhost" + port, @@ -408,9 +408,9 @@ func TestShutdownWithStaleConnection(t *testing.T) { webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} - l, err := webHandler.Listener() + l, err := webHandler.Listeners() if err != nil { - panic(fmt.Sprintf("Unable to start web listener: %s", err)) + panic(fmt.Sprintf("Unable to start web listeners: %s", err)) } closed := make(chan struct{}) @@ -448,7 +448,7 @@ func TestHandleMultipleQuitRequests(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) opts := &Options{ - ListenAddress: port, + ListenAddresses: []string{port}, MaxConnections: 512, EnableLifecycle: true, RoutePrefix: "/", @@ -461,9 +461,9 @@ func TestHandleMultipleQuitRequests(t *testing.T) { webHandler := New(nil, opts) webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} - l, err := webHandler.Listener() + l, err := webHandler.Listeners() if err != nil { - panic(fmt.Sprintf("Unable to start web listener: %s", err)) + panic(fmt.Sprintf("Unable to start web listeners: %s", err)) } ctx, cancel := context.WithCancel(context.Background()) closed := make(chan struct{}) @@ -513,17 +513,17 @@ func TestAgentAPIEndPoints(t *testing.T) { port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) opts := &Options{ - ListenAddress: port, - ReadTimeout: 30 * time.Second, - MaxConnections: 512, - Context: nil, - Storage: nil, - QueryEngine: nil, - ScrapeManager: &scrape.Manager{}, - RuleManager: &rules.Manager{}, - Notifier: nil, - RoutePrefix: "/", - EnableAdminAPI: true, + ListenAddresses: []string{port}, + ReadTimeout: 30 * time.Second, + MaxConnections: 512, + Context: nil, + Storage: nil, + QueryEngine: nil, + ScrapeManager: &scrape.Manager{}, + RuleManager: &rules.Manager{}, + Notifier: nil, + RoutePrefix: "/", + EnableAdminAPI: true, ExternalURL: &url.URL{ Scheme: "http", Host: "localhost" + port, @@ -540,9 +540,9 @@ func TestAgentAPIEndPoints(t *testing.T) { webHandler.SetReady(true) webHandler.config = &config.Config{} webHandler.notifier = ¬ifier.Manager{} - l, err := webHandler.Listener() + l, err := webHandler.Listeners() if err != nil { - panic(fmt.Sprintf("Unable to start web listener: %s", err)) + panic(fmt.Sprintf("Unable to start web listeners: %s", err)) } ctx, cancel := context.WithCancel(context.Background()) @@ -628,3 +628,83 @@ func cleanupSnapshot(t *testing.T, dbDir string, resp *http.Response) { require.NoError(t, os.Remove(filepath.Join(dbDir, "snapshots", snapshot.Data.Name))) require.NoError(t, os.Remove(filepath.Join(dbDir, "snapshots"))) } + +func TestMultipleListenAddresses(t *testing.T) { + t.Parallel() + + dbDir := t.TempDir() + + db, err := tsdb.Open(dbDir, nil, nil, nil, nil) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + // Create multiple ports for testing multiple ListenAddresses + port1 := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) + port2 := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) + + opts := &Options{ + ListenAddresses: []string{port1, port2}, + ReadTimeout: 30 * time.Second, + MaxConnections: 512, + Context: nil, + Storage: nil, + LocalStorage: &dbAdapter{db}, + TSDBDir: dbDir, + QueryEngine: nil, + ScrapeManager: &scrape.Manager{}, + RuleManager: &rules.Manager{}, + Notifier: nil, + RoutePrefix: "/", + EnableAdminAPI: true, + ExternalURL: &url.URL{ + Scheme: "http", + Host: "localhost" + port1, + Path: "/", + }, + Version: &PrometheusVersion{}, + Gatherer: prometheus.DefaultGatherer, + } + + opts.Flags = map[string]string{} + + webHandler := New(nil, opts) + + webHandler.config = &config.Config{} + webHandler.notifier = ¬ifier.Manager{} + l, err := webHandler.Listeners() + if err != nil { + panic(fmt.Sprintf("Unable to start web listener: %s", err)) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + err := webHandler.Run(ctx, l, "") + if err != nil { + panic(fmt.Sprintf("Can't start web handler:%s", err)) + } + }() + + // Give some time for the web goroutine to run since we need the server + // to be up before starting tests. + time.Sleep(5 * time.Second) + + // Set to ready. + webHandler.SetReady(true) + + for _, port := range []string{port1, port2} { + baseURL := "http://localhost" + port + + resp, err := http.Get(baseURL + "/-/healthy") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) + + resp, err = http.Get(baseURL + "/-/ready") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + cleanupTestResponse(t, resp) + } +} From 88cac6fb49958eb83faf8eb166eeec62fbee49a0 Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 14 Aug 2024 19:06:49 +0200 Subject: [PATCH 22/83] docs(configuration.md): clarify the explanations about some temp labels and format for better visibility. Signed-off-by: machine424 --- docs/configuration/configuration.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 313a7f2f37..1c43f750b1 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3265,12 +3265,16 @@ Initially, aside from the configured per-target labels, a target's `job` label is set to the `job_name` value of the respective scrape configuration. The `__address__` label is set to the `:` address of the target. After relabeling, the `instance` label is set to the value of `__address__` by default if -it was not set during relabeling. The `__scheme__` and `__metrics_path__` labels -are set to the scheme and metrics path of the target respectively. The `__param_` -label is set to the value of the first passed URL parameter called ``. +it was not set during relabeling. + +The `__scheme__` and `__metrics_path__` labels +are set to the scheme and metrics path of the target respectively, as specified in `scrape_config`. + +The `__param_` +label is set to the value of the first passed URL parameter called ``, as defined in `scrape_config`. The `__scrape_interval__` and `__scrape_timeout__` labels are set to the target's -interval and timeout. +interval and timeout, as specified in `scrape_config`. Additional labels prefixed with `__meta_` may be available during the relabeling phase. They are set by the service discovery mechanism that provided From 1435c8ae4aa1041592778018ba62fc3058a9ad3d Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Tue, 20 Aug 2024 19:41:02 +0200 Subject: [PATCH 23/83] Include test CA text info (#14699) Use `openssl x509 -text -in ` to include the text version of the certificate in order to make it easier to see the diff when applying changes to the cert. Signed-off-by: SuperQ --- scrape/testdata/ca.cer | 58 +++++++++++++++++++++++++++++++++++++++++ tracing/testdata/ca.cer | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/scrape/testdata/ca.cer b/scrape/testdata/ca.cer index df93443923..dbbd009d4a 100644 --- a/scrape/testdata/ca.cer +++ b/scrape/testdata/ca.cer @@ -1,3 +1,61 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: + 93:6c:9e:29:8d:37:7b:66 + Signature Algorithm: sha256WithRSAEncryption + Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA + Validity + Not Before: Aug 20 11:51:23 2024 GMT + Not After : Dec 5 11:51:23 2044 GMT + Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7: + 47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77: + 3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3: + aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d: + a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6: + 1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff: + 8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28: + 0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56: + 25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12: + c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75: + a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6: + a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45: + c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1: + 9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec: + 2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04: + f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f: + 00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0: + ed:73 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Key Identifier: + CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B + X509v3 Authority Key Identifier: + CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B + X509v3 Basic Constraints: + CA:TRUE + Signature Algorithm: sha256WithRSAEncryption + Signature Value: + 4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0: + d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f: + df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a: + d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c: + 4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0: + 8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f: + bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16: + ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb: + 77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf: + 4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91: + bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21: + 94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a: + dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a: + 50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e: + b5:28:5f:83 -----BEGIN CERTIFICATE----- MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg diff --git a/tracing/testdata/ca.cer b/tracing/testdata/ca.cer index df93443923..dbbd009d4a 100644 --- a/tracing/testdata/ca.cer +++ b/tracing/testdata/ca.cer @@ -1,3 +1,61 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: + 93:6c:9e:29:8d:37:7b:66 + Signature Algorithm: sha256WithRSAEncryption + Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA + Validity + Not Before: Aug 20 11:51:23 2024 GMT + Not After : Dec 5 11:51:23 2044 GMT + Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7: + 47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77: + 3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3: + aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d: + a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6: + 1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff: + 8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28: + 0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56: + 25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12: + c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75: + a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6: + a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45: + c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1: + 9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec: + 2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04: + f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f: + 00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0: + ed:73 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Key Identifier: + CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B + X509v3 Authority Key Identifier: + CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B + X509v3 Basic Constraints: + CA:TRUE + Signature Algorithm: sha256WithRSAEncryption + Signature Value: + 4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0: + d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f: + df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a: + d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c: + 4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0: + 8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f: + bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16: + ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb: + 77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf: + 4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91: + bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21: + 94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a: + dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a: + 50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e: + b5:28:5f:83 -----BEGIN CERTIFICATE----- MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg From 8e82ac8d2bbee98c1f835fe715d2ea2ad4aa362a Mon Sep 17 00:00:00 2001 From: suntala Date: Tue, 20 Aug 2024 20:40:55 +0200 Subject: [PATCH 24/83] Add comments to the sort by label functions Signed-off-by: suntala --- promql/functions.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index e9bfe45f4a..189d67caf5 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -406,17 +406,22 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + // First, sort by the full label set. This ensures a consistent ordering in case sorting by the + // labels provided as arguments is not conclusive. slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { return labels.Compare(a.Metric, b.Metric) }) labels := stringSliceFromArgs(args[1:]) + // Next, sort by the labels provided as arguments. slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label + // Iterate over each given label. for _, label := range labels { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) + // If we encounter multiple samples with the same label values, the sorting which was + // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -436,17 +441,22 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + // First, sort by the full label set. This ensures a consistent ordering in case sorting by the + // labels provided as arguments is not conclusive. slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { return labels.Compare(b.Metric, a.Metric) }) labels := stringSliceFromArgs(args[1:]) + // Next, sort by the labels provided as arguments. slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label + // Iterate over each given label. for _, label := range labels { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) + // If we encounter multiple samples with the same label values, the sorting which was + // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } From 8f828d45c11c2c3555f8aab4798295290ffc3e67 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Sun, 4 Aug 2024 08:35:09 +0800 Subject: [PATCH 25/83] convert TestNativeHistogram_Sum_Count_Add_AvgOperator into testing framework Signed-off-by: Ziqi Zhao --- model/histogram/float_histogram.go | 14 +- promql/engine_test.go | 211 ------------------ promql/promqltest/test.go | 4 +- .../testdata/native_histograms.test | 36 +++ util/almost/almost.go | 10 +- 5 files changed, 57 insertions(+), 218 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 2a37ea66d4..b3baaffb0d 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -17,6 +17,12 @@ import ( "fmt" "math" "strings" + + "github.com/prometheus/prometheus/util/almost" +) + +const ( + defaultEpsilon = 0.000001 ) // FloatHistogram is similar to Histogram but uses float64 for all @@ -456,8 +462,8 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { } if h.Schema != h2.Schema || - math.Float64bits(h.Count) != math.Float64bits(h2.Count) || - math.Float64bits(h.Sum) != math.Float64bits(h2.Sum) { + !almost.Equal(h.Count, h2.Count, defaultEpsilon) || + !almost.Equal(h.Sum, h2.Sum, defaultEpsilon) { return false } @@ -468,7 +474,7 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { } if h.ZeroThreshold != h2.ZeroThreshold || - math.Float64bits(h.ZeroCount) != math.Float64bits(h2.ZeroCount) { + !almost.Equal(h.ZeroCount, h2.ZeroCount, defaultEpsilon) { return false } @@ -1310,7 +1316,7 @@ func FloatBucketsMatch(b1, b2 []float64) bool { return false } for i, b := range b1 { - if math.Float64bits(b) != math.Float64bits(b2[i]) { + if !almost.Equal(b, b2[i], defaultEpsilon) { return false } } diff --git a/promql/engine_test.go b/promql/engine_test.go index 8d5f87244c..37cba975b3 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3097,217 +3097,6 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { } } -func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - cases := []struct { - histograms []histogram.Histogram - expected histogram.FloatHistogram - expectedAvg histogram.FloatHistogram - }{ - { - histograms: []histogram.Histogram{ - { - CounterResetHint: histogram.GaugeType, - Schema: 0, - Count: 25, - Sum: 1234.5, - ZeroThreshold: 0.001, - ZeroCount: 4, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{1, 1, -1, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 2, Length: 2}, - }, - NegativeBuckets: []int64{2, 2, -3, 8}, - }, - { - CounterResetHint: histogram.GaugeType, - Schema: 0, - Count: 41, - Sum: 2345.6, - ZeroThreshold: 0.001, - ZeroCount: 5, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 4}, - {Offset: 0, Length: 0}, - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 2, Length: 0}, - {Offset: 2, Length: 3}, - }, - NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, - }, - { - CounterResetHint: histogram.GaugeType, - Schema: 0, - Count: 41, - Sum: 1111.1, - ZeroThreshold: 0.001, - ZeroCount: 5, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 4}, - {Offset: 0, Length: 0}, - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 2, Length: 0}, - {Offset: 2, Length: 3}, - }, - NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, - }, - { - CounterResetHint: histogram.GaugeType, - Schema: 1, // Everything is 0 just to make the count 4 so avg has nicer numbers. - }, - }, - expected: histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 0, - ZeroThreshold: 0.001, - ZeroCount: 14, - Count: 107, - Sum: 4691.2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 7}, - }, - PositiveBuckets: []float64{3, 8, 2, 5, 3, 2, 2}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 6}, - {Offset: 3, Length: 3}, - }, - NegativeBuckets: []float64{2, 6, 8, 4, 15, 9, 10, 10, 4}, - }, - expectedAvg: histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 0, - ZeroThreshold: 0.001, - ZeroCount: 3.5, - Count: 26.75, - Sum: 1172.8, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 7}, - }, - PositiveBuckets: []float64{0.75, 2, 0.5, 1.25, 0.75, 0.5, 0.5}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 6}, - {Offset: 3, Length: 3}, - }, - NegativeBuckets: []float64{0.5, 1.5, 2, 1, 3.75, 2.25, 2.5, 2.5, 1}, - }, - }, - } - - idx0 := int64(0) - for _, c := range cases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) { - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - seriesNameOverTime := "sparse_histogram_series_over_time" - - engine := newTestEngine() - - ts := idx0 * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - _, err := app.Append(0, labels.FromStrings("__name__", "float_series", "idx", "0"), ts, 42) - require.NoError(t, err) - for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) - // Since we mutate h later, we need to create a copy here. - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) - } - require.NoError(t, err) - - lbls = labels.FromStrings("__name__", seriesNameOverTime) - newTs := ts + int64(idx1)*int64(time.Minute/time.Millisecond) - // Since we mutate h later, we need to create a copy here. - if floatHisto { - _, err = app.AppendHistogram(0, lbls, newTs, nil, h.Copy().ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, newTs, h.Copy(), nil) - } - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryAndCheck := func(queryString string, ts int64, exp promql.Vector) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - require.Empty(t, res.Warnings) - - vector, err := res.Vector() - require.NoError(t, err) - - testutil.RequireEqual(t, exp, vector) - } - queryAndCheckAnnotations := func(queryString string, ts int64, expWarnings annotations.Annotations) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - require.Equal(t, expWarnings, res.Warnings) - } - - // sum(). - queryString := fmt.Sprintf("sum(%s)", seriesName) - queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) - - queryString = `sum({idx="0"})` - var annos annotations.Annotations - annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(posrange.PositionRange{Start: 4, End: 13})) - queryAndCheckAnnotations(queryString, ts, annos) - - // + operator. - queryString = fmt.Sprintf(`%s{idx="0"}`, seriesName) - for idx := 1; idx < len(c.histograms); idx++ { - queryString += fmt.Sprintf(` + ignoring(idx) %s{idx="%d"}`, seriesName, idx) - } - queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) - - // count(). - queryString = fmt.Sprintf("count(%s)", seriesName) - queryAndCheck(queryString, ts, []promql.Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) - - // avg(). - queryString = fmt.Sprintf("avg(%s)", seriesName) - queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) - - offset := int64(len(c.histograms) - 1) - newTs := ts + offset*int64(time.Minute/time.Millisecond) - - // sum_over_time(). - queryString = fmt.Sprintf("sum_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}}) - - // avg_over_time(). - queryString = fmt.Sprintf("avg_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) - }) - idx0++ - } - } -} - func TestNativeHistogram_SubOperator(t *testing.T) { // TODO(codesome): Integrate histograms into the PromQL testing framework // and write more tests there. diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 8b1ec381ad..de67cae837 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -769,7 +769,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected histogram value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s)) } - if !actual.H.Equals(expected.H.Compact(0)) { + if !actual.H.Compact(0).Equals(expected.H.Compact(0)) { return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s)) } } @@ -804,7 +804,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if expH != nil && v.H == nil { return fmt.Errorf("expected histogram %s for %s but got float value %v", HistogramTestExpression(expH), v.Metric, v.F) } - if expH != nil && !expH.Compact(0).Equals(v.H) { + if expH != nil && !expH.Compact(0).Equals(v.H.Compact(0)) { return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H)) } if !almost.Equal(exp0.Value, v.F, defaultEpsilon) { diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index ea838f1e32..110fd69e7e 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -886,3 +886,39 @@ eval_warn instant at 0 sum by (group) (metric) {group="just-floats"} 5 {group="just-exponential-histograms"} {{sum:5 count:7 buckets:[2 3 2]}} {group="just-custom-histograms"} {{schema:-53 sum:4 count:5 custom_values:[2] buckets:[8]}} + +clear + +# Test native histograms with sum, count, avg. +load 10m + histogram_sum{idx="0"} {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}}x1 + histogram_sum{idx="1"} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1 + histogram_sum{idx="2"} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1 + histogram_sum{idx="3"} {{schema:1 count:0}}x1 + histogram_sum_float{idx="0"} 42.0x1 + +eval instant at 10m sum(histogram_sum) + {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} + +eval_warn instant at 10m sum({idx="0"}) + +eval instant at 10m sum(histogram_sum{idx="0"} + ignoring(idx) histogram_sum{idx="1"} + ignoring(idx) histogram_sum{idx="2"} + ignoring(idx) histogram_sum{idx="3"}) + {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} + +eval instant at 10m count(histogram_sum) + {} 4 + +eval instant at 10m avg(histogram_sum) + {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}} + +clear + +# Test native histograms with sum_over_time, avg_over_time. +load 1m + histogram_sum_over_time {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:1 count:0}} + +eval instant at 3m sum_over_time(histogram_sum_over_time[3m:1m]) + {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} + +eval instant at 3m avg_over_time(histogram_sum_over_time[3m:1m]) + {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}} diff --git a/util/almost/almost.go b/util/almost/almost.go index 34f1290a5f..1803c4b430 100644 --- a/util/almost/almost.go +++ b/util/almost/almost.go @@ -13,13 +13,21 @@ package almost -import "math" +import ( + "math" + + "github.com/prometheus/prometheus/model/value" +) var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. // Equal returns true if a and b differ by less than their sum // multiplied by epsilon. func Equal(a, b, epsilon float64) bool { + if value.IsStaleNaN(a) || value.IsStaleNaN(b) { + return value.IsStaleNaN(a) && value.IsStaleNaN(b) + } + // NaN has no equality but for testing we still want to know whether both values // are NaN. if math.IsNaN(a) && math.IsNaN(b) { From b2712ff284b5193698f68f475e7e237e714c51bb Mon Sep 17 00:00:00 2001 From: shandongzhejiang Date: Wed, 21 Aug 2024 11:09:37 +0800 Subject: [PATCH 26/83] chore: fix some function names Signed-off-by: shandongzhejiang --- tsdb/agent/db.go | 2 +- tsdb/chunks/head_chunks_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 1b6df3af0f..9697739e00 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -1118,7 +1118,7 @@ func (a *appender) logSeries() error { return nil } -// mintTs returns the minimum timestamp that a sample can have +// minValidTime returns the minimum timestamp that a sample can have // and is needed for preventing underflow. func (a *appender) minValidTime(lastTs int64) int64 { if lastTs < math.MinInt64+a.opts.OutOfOrderTimeWindow { diff --git a/tsdb/chunks/head_chunks_test.go b/tsdb/chunks/head_chunks_test.go index 4a4d89e811..b2aa39c3b0 100644 --- a/tsdb/chunks/head_chunks_test.go +++ b/tsdb/chunks/head_chunks_test.go @@ -408,7 +408,7 @@ func TestChunkDiskMapper_Truncate_WriteQueueRaceCondition(t *testing.T) { wg.Wait() } -// TestHeadReadWriter_TruncateAfterIterateChunksError tests for +// TestHeadReadWriter_TruncateAfterFailedIterateChunks tests for // https://github.com/prometheus/prometheus/issues/7753 func TestHeadReadWriter_TruncateAfterFailedIterateChunks(t *testing.T) { hrw := createChunkDiskMapper(t, "") From 21106611217feefa32bb767f938be73c946700e5 Mon Sep 17 00:00:00 2001 From: cuishuang Date: Wed, 21 Aug 2024 18:15:25 +0800 Subject: [PATCH 27/83] fix: fix slice init length Signed-off-by: cuishuang --- storage/remote/queue_manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 1c06173a59..032a1a92f7 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -930,7 +930,7 @@ func createHistograms(numSamples, numSeries int, floatHistogram bool) ([]record. } func createSeriesMetadata(series []record.RefSeries) []record.RefMetadata { - metas := make([]record.RefMetadata, len(series)) + metas := make([]record.RefMetadata, 0, len(series)) for _, s := range series { metas = append(metas, record.RefMetadata{ From 9a74d53935db7fc847bb87e00d61ca112dff409e Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 21 Aug 2024 14:24:20 +0100 Subject: [PATCH 28/83] [BUGFIX] TSDB: Fix query overlapping in-order and ooo head (#14693) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tsdb: Unit test query overlapping in order and ooo head Signed-off-by: György Krajcsovits * TSDB: Merge overlapping head chunk The basic idea is that getOOOSeriesChunks can populate Meta.Chunk, but since it only returns one Meta per overlapping time-slot, that pointer may end up in a Meta with a head-chunk ID. So we need HeadAndOOOChunkReader.ChunkOrIterable() to call mergedChunks in that case. Previously, mergedChunks was checking that meta.Ref was a valid OOO chunk reference, but it never actually uses that reference; it just finds all chunks overlapping in time. So we can delete that code. Signed-off-by: Bryan Boreham Co-authored-by: György Krajcsovits --- tsdb/db_test.go | 34 ++++++++++++++++++++++++++++------ tsdb/head_read.go | 23 ++--------------------- tsdb/ooo_head_read.go | 6 +++++- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 904fdeffcb..6d266e8724 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -5042,10 +5042,16 @@ func Test_Querier_OOOQuery(t *testing.T) { series1 := labels.FromStrings("foo", "bar1") minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter func(int64) bool) ([]chunks.Sample, int) { + if filter == nil { + filter = func(int64) bool { return true } + } app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { + if !filter(m / time.Minute.Milliseconds()) { + continue + } _, err := app.Append(0, series1, m, float64(m)) if m >= queryMinT && m <= queryMaxT { expSamples = append(expSamples, sample{t: m, f: float64(m)}) @@ -5084,6 +5090,15 @@ func Test_Querier_OOOQuery(t *testing.T) { oooMinT: minutes(0), oooMaxT: minutes(99), }, + { + name: "query overlapping inorder and ooo samples returns all ingested samples", + queryMinT: minutes(0), + queryMaxT: minutes(200), + inOrderMinT: minutes(100), + inOrderMaxT: minutes(200), + oooMinT: minutes(180 - opts.OutOfOrderCapMax/2), // Make sure to fit into the OOO head. + oooMaxT: minutes(180), + }, } for _, tc := range tests { t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { @@ -5093,13 +5108,20 @@ func Test_Querier_OOOQuery(t *testing.T) { require.NoError(t, db.Close()) }() - var expSamples []chunks.Sample + var ( + expSamples []chunks.Sample + inoSamples int + ) - // Add in-order samples. - expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples) + // Add in-order samples (at even minutes). + expSamples, inoSamples = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 0 }) + // Sanity check that filter is not too zealous. + require.Positive(t, inoSamples, 0) - // Add out-of-order samples. - expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples) + // Add out-of-order samples (at odd minutes). + expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 1 }) + // Sanity check that filter is not too zealous. + require.Positive(t, oooSamples, 0) sort.Slice(expSamples, func(i, j int) bool { return expSamples[i].T() < expSamples[j].T() diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 47f12df994..5e3a76273a 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -481,31 +481,12 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi return elem, true, offset == 0, nil } -// mergedChunks return an iterable over one or more OOO chunks for the given -// chunks.Meta reference from memory or by m-mapping it from the disk. The -// returned iterable will be a merge of all the overlapping chunks, if any, -// amongst all the chunks in the OOOHead. +// mergedChunks return an iterable over all chunks that overlap the +// time window [mint,maxt], plus meta.Chunk if populated. // If hr is non-nil then in-order chunks are included. // This function is not thread safe unless the caller holds a lock. // The caller must ensure that s.ooo is not nil. func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (chunkenc.Iterable, error) { - _, cid, _ := unpackHeadChunkRef(meta.Ref) - - // ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are - // incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index. - // The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix - // is len(s.mmappedChunks), it represents the next chunk, which is the head chunk. - ix := int(cid) - int(s.ooo.firstOOOChunkID) - if ix < 0 || ix > len(s.ooo.oooMmappedChunks) { - return nil, storage.ErrNotFound - } - - if ix == len(s.ooo.oooMmappedChunks) { - if s.ooo.oooHeadChunk == nil { - return nil, errors.New("invalid ooo head chunk") - } - } - // We create a temporary slice of chunk metas to hold the information of all // possible chunks that may overlap with the requested chunk. tmpChks := make([]chunkMetaAndChunkDiskMapperRef, 0, len(s.ooo.oooMmappedChunks)+1) diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index 55e241fd90..cc98df4729 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -242,7 +242,7 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) { sid, _, isOOO := unpackHeadChunkRef(meta.Ref) - if !isOOO { + if !isOOO && meta.Chunk == nil { // meta.Chunk can have a copy of OOO head samples, even on non-OOO chunk ID. return cr.cr.ChunkOrIterable(meta) } @@ -253,6 +253,10 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu } s.Lock() + if s.ooo == nil { // Must have s.ooo non-nil to call mergedChunks(). + s.Unlock() + return cr.cr.ChunkOrIterable(meta) + } mc, err := s.mergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef) s.Unlock() From b50c5d42feb66dfb74d70fcdc0eac9d1a15006e2 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 21 Aug 2024 15:22:38 +0200 Subject: [PATCH 29/83] OTLP receiver: Warn when encountering invalid exponential histograms Signed-off-by: Arve Knudsen --- CHANGELOG.md | 1 + .../prometheusremotewrite/histograms.go | 21 ++++++---- .../prometheusremotewrite/metrics_to_prw.go | 11 ++++-- .../metrics_to_prw_test.go | 39 ++++++++++++++++++- storage/remote/write_handler.go | 9 ++++- 5 files changed, 67 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0036a4a762..6fa8410c98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 +* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706 * [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 ## 2.54.0-rc.1 / 2024-08-05 diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 73528019d8..ec93387fc6 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/util/annotations" ) const defaultZeroThreshold = 1e-128 @@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128 // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series // as native histogram samples. func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, - resource pcommon.Resource, settings Settings, promName string) error { + resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) { + var annots annotations.Annotations for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) - histogram, err := exponentialToNativeHistogram(pt) + histogram, ws, err := exponentialToNativeHistogram(pt) + annots.Merge(ws) if err != nil { - return err + return annots, err } lbls := createAttributes( @@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr ts.Exemplars = append(ts.Exemplars, exemplars...) } - return nil + return annots, nil } // exponentialToNativeHistogram translates OTel Exponential Histogram data point // to Prometheus Native Histogram. -func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) { +func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) { + var annots annotations.Annotations scale := p.Scale() if scale < -4 { - return prompb.Histogram{}, + return prompb.Histogram{}, annots, fmt.Errorf("cannot convert exponential to native histogram."+ " Scale must be >= -4, was %d", scale) } @@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom h.Sum = p.Sum() } h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()} + if p.Count() == 0 && h.Sum != 0 { + annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum)) + } } - return h, nil + return h, annots, nil } // convertBucketsLayout translates OTel Exponential Histogram dense buckets diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index a3a7897232..9d76800809 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" + "github.com/prometheus/prometheus/util/annotations" ) type Settings struct { @@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { +func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( + ws, err := c.addExponentialHistogramDataPoints( dataPoints, resource, settings, promName, - )) + ) + annots.Merge(ws) + errs = multierr.Append(errs, err) case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { @@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) } - return + return annots, errs } func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go index 37ac677747..bdc1c9d0b2 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -27,6 +27,41 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" ) +func TestFromMetrics(t *testing.T) { + t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) { + request := pmetricotlp.NewExportRequest() + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", 10) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= 10; i++ { + m := metrics.AppendEmpty() + m.SetEmptyExponentialHistogram() + m.SetName(fmt.Sprintf("histogram-%d", i)) + m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.ExponentialHistogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + h.SetCount(0) + h.SetSum(155) + + generateAttributes(h.Attributes(), "series", 10) + } + + converter := NewPrometheusConverter() + annots, err := converter.FromMetrics(request.Metrics(), Settings{}) + require.NoError(t, err) + require.NotEmpty(t, annots) + ws, infos := annots.AsStrings("", 0, 0) + require.Empty(t, infos) + require.Equal(t, []string{ + "exponential histogram data point has zero count, but non-zero sum: 155.000000", + }, ws) + }) +} + func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for _, resourceAttributeCount := range []int{0, 5, 50} { b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { @@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for i := 0; i < b.N; i++ { converter := NewPrometheusConverter() - require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) + annots, err := converter.FromMetrics(payload.Metrics(), Settings{}) + require.NoError(b, err) + require.Empty(b, annots) require.NotNil(b, converter.TimeSeries()) } }) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index aba79a561d..8ac7590465 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -502,12 +502,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { otlpCfg := h.configFunc().OTLPConfig converter := otlptranslator.NewPrometheusConverter() - if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ + annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, - }); err != nil { + }) + if err != nil { level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) } + ws, _ := annots.AsStrings("", 0, 0) + if len(ws) > 0 { + level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ Timeseries: converter.TimeSeries(), From fbcd50f32c7a5f8f040cb4c9a4335f821a263f18 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 21 Aug 2024 18:55:28 +0200 Subject: [PATCH 30/83] Upgrade golangci-lint to v1.60.2 Signed-off-by: Arve Knudsen --- .github/workflows/ci.yml | 2 +- Makefile.common | 2 +- scripts/golangci-lint.yml | 2 +- template/template.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3a1d68e98..c89c9507bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -186,7 +186,7 @@ jobs: with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.60.1 + version: v1.60.2 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' diff --git a/Makefile.common b/Makefile.common index 2ecd5465c3..34d65bb56d 100644 --- a/Makefile.common +++ b/Makefile.common @@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.60.1 +GOLANGCI_LINT_VERSION ?= v1.60.2 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index fc0f9c6543..f4a7385bb5 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -36,4 +36,4 @@ jobs: uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0 with: args: --verbose - version: v1.60.1 + version: v1.60.2 diff --git a/template/template.go b/template/template.go index 9ffed6ff61..c507dbe746 100644 --- a/template/template.go +++ b/template/template.go @@ -166,7 +166,7 @@ func NewTemplateExpander( return html_template.HTML(text) }, "match": regexp.MatchString, - "title": strings.Title, + "title": strings.Title, //nolint:staticcheck "toUpper": strings.ToUpper, "toLower": strings.ToLower, "graphLink": strutil.GraphLinkForExpression, From 0f760f63dd5137ad406e0bc9f08676898e04ac97 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 22 Aug 2024 13:59:36 +0200 Subject: [PATCH 31/83] lint: Revamp our linting rules, mostly around doc comments Several things done here: - Set `max-issues-per-linter` to 0 so that we actually see all linter warnings and not just 50 per linter. (As we also set `max-same-issues` to 0, I assume this was the intention from the beginning.) - Stop using the golangci-lint default excludes (by setting `exclude-use-default: false`. Those are too generous and don't match our style conventions. (I have re-added some of the excludes explicitly in this commit. See below.) - Re-add the `errcheck` exclusion we have used so far via the defaults. - Exclude the signature requirement `govet` has for `Seek` methods because we use non-standard `Seek` methods a lot. (But we keep other requirements, while the default excludes completely disabled the check for common method segnatures.) - Exclude warnings about missing doc comments on exported symbols. (We used to be pretty adamant about doc comments, but stopped that at some point in the past. By now, we have about 500 missing doc comments. We may consider reintroducing this check, but that's outside of the scope of this commit. The default excludes of golangci-lint essentially ignore doc comments completely.) - By stop using the default excludes, we now get warnings back on malformed doc comments. That's the most impactful change in this commit. It does not enforce doc comments (again), but _if_ there is a doc comment, it has to have the recommended form. (Most of the changes in this commit are fixing this form.) - Improve wording/spelling of some comments in .golangci.yml, and remove an outdated comment. - Leave `package-comments` inactive, but add a TODO asking if we should change that. - Add a new sub-linter `comment-spacings` (and fix corresponding comments), which avoids missing spaces after the leading `//`. Signed-off-by: beorn7 --- .golangci.yml | 30 +++++++++++++++++++++++----- cmd/promtool/main.go | 2 +- cmd/promtool/metrics.go | 2 +- discovery/discoverer_metrics_noop.go | 2 +- discovery/discovery.go | 22 ++++++++++---------- discovery/manager.go | 2 +- discovery/metrics_refresh.go | 2 +- discovery/util.go | 6 +++--- model/exemplar/exemplar.go | 6 ++++-- model/labels/labels.go | 11 +++++----- model/textparse/interface.go | 4 ++-- promql/engine.go | 6 +++--- promql/parser/ast.go | 5 ++--- scrape/clientprotobuf.go | 4 ++-- storage/interface.go | 2 +- storage/remote/azuread/azuread.go | 6 ++++-- template/template.go | 2 +- tsdb/chunks/head_chunks.go | 4 ++-- tsdb/db.go | 2 +- tsdb/encoding/encoding.go | 4 ++-- tsdb/head_other.go | 2 +- tsdb/index/index.go | 5 +++-- tsdb/ooo_head_read.go | 4 ++-- tsdb/wlog/watcher.go | 11 +++++----- tsdb/wlog/wlog.go | 4 ++-- util/annotations/annotations.go | 2 +- util/testutil/cmp.go | 5 +++-- 27 files changed, 93 insertions(+), 64 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index e924fe3d5b..303cd33d8b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -25,15 +25,34 @@ linters: - loggercheck issues: + max-issues-per-linter: 0 max-same-issues: 0 + # The default exclusions are too aggressive. For one, they + # essentially disable any linting on doc comments. We disable + # default exclusions here and add exclusions fitting our codebase + # further down. + exclude-use-default: false exclude-files: # Skip autogenerated files. - ^.*\.(pb|y)\.go$ exclude-dirs: - # Copied it from a different source + # Copied it from a different source. - storage/remote/otlptranslator/prometheusremotewrite - storage/remote/otlptranslator/prometheus exclude-rules: + - linters: + - errcheck + # Taken from the default exclusions (that are otherwise disabled above). + text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked + - linters: + - govet + # We use many Seek methods that do not follow the usual pattern. + text: "stdmethods: method Seek.* should have signature Seek" + - linters: + - revive + # We have stopped at some point to write doc comments on exported symbols. + # TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now. + text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported - linters: - gocritic text: "appendAssign" @@ -94,15 +113,14 @@ linters-settings: errorf: false revive: # By default, revive will enable only the linting rules that are named in the configuration file. - # So, it's needed to explicitly set in configuration all required rules. - # The following configuration enables all the rules from the defaults.toml - # https://github.com/mgechev/revive/blob/master/defaults.toml + # So, it's needed to explicitly enable all required rules here. rules: # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md - name: blank-imports + - name: comment-spacings - name: context-as-argument arguments: - # allow functions with test or bench signatures + # Allow functions with test or bench signatures. - allowTypesBefore: "*testing.T,testing.TB" - name: context-keys-type - name: dot-imports @@ -118,6 +136,8 @@ linters-settings: - name: increment-decrement - name: indent-error-flow - name: package-comments + # TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them. + disabled: true - name: range - name: receiver-naming - name: redefines-builtin-id diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 1c8e1dd1c8..4d033c3c09 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -471,7 +471,7 @@ func (ls lintConfig) lintDuplicateRules() bool { return ls.all || ls.duplicateRules } -// Check server status - healthy & ready. +// CheckServerStatus - healthy & ready. func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error { if serverURL.Scheme == "" { serverURL.Scheme = "http" diff --git a/cmd/promtool/metrics.go b/cmd/promtool/metrics.go index 6d162f459a..4c91d1d6fe 100644 --- a/cmd/promtool/metrics.go +++ b/cmd/promtool/metrics.go @@ -31,7 +31,7 @@ import ( "github.com/prometheus/prometheus/util/fmtutil" ) -// Push metrics to a prometheus remote write (for testing purpose only). +// PushMetrics to a prometheus remote write (for testing purpose only). func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int { addressURL, err := url.Parse(url.String()) if err != nil { diff --git a/discovery/discoverer_metrics_noop.go b/discovery/discoverer_metrics_noop.go index 638317ace1..4321204b6c 100644 --- a/discovery/discoverer_metrics_noop.go +++ b/discovery/discoverer_metrics_noop.go @@ -13,7 +13,7 @@ package discovery -// Create a dummy metrics struct, because this SD doesn't have any metrics. +// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics. type NoopDiscovererMetrics struct{} var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil) diff --git a/discovery/discovery.go b/discovery/discovery.go index a5826f8176..a91faf6c86 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -39,7 +39,7 @@ type Discoverer interface { Run(ctx context.Context, up chan<- []*targetgroup.Group) } -// Internal metrics of service discovery mechanisms. +// DiscovererMetrics are internal metrics of service discovery mechanisms. type DiscovererMetrics interface { Register() error Unregister() @@ -56,7 +56,7 @@ type DiscovererOptions struct { HTTPClientOptions []config.HTTPClientOption } -// Metrics used by the "refresh" package. +// RefreshMetrics are used by the "refresh" package. // We define them here in the "discovery" package in order to avoid a cyclic dependency between // "discovery" and "refresh". type RefreshMetrics struct { @@ -64,17 +64,18 @@ type RefreshMetrics struct { Duration prometheus.Observer } -// Instantiate the metrics used by the "refresh" package. +// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package. type RefreshMetricsInstantiator interface { Instantiate(mech string) *RefreshMetrics } -// An interface for registering, unregistering, and instantiating metrics for the "refresh" package. -// Refresh metrics are registered and unregistered outside of the service discovery mechanism. -// This is so that the same metrics can be reused across different service discovery mechanisms. -// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are -// specific to that SD. However, doing so would also expose too many unused metrics on -// the Prometheus /metrics endpoint. +// RefreshMetricsManager is an interface for registering, unregistering, and +// instantiating metrics for the "refresh" package. Refresh metrics are +// registered and unregistered outside of the service discovery mechanism. This +// is so that the same metrics can be reused across different service discovery +// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to +// use const labels which are specific to that SD. However, doing so would also +// expose too many unused metrics on the Prometheus /metrics endpoint. type RefreshMetricsManager interface { DiscovererMetrics RefreshMetricsInstantiator @@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) { return staticDiscoverer(c), nil } -// No metrics are needed for this service discovery mechanism. +// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are +// needed for this service discovery mechanism. func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics { return &NoopDiscovererMetrics{} } diff --git a/discovery/manager.go b/discovery/manager.go index 897d7d151c..48bea85bb7 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -64,7 +64,7 @@ func (p *Provider) Config() interface{} { return p.config } -// Registers the metrics needed for SD mechanisms. +// CreateAndRegisterSDMetrics registers the metrics needed for SD mechanisms. // Does not register the metrics for the Discovery Manager. // TODO(ptodev): Add ability to unregister the metrics? func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]DiscovererMetrics, error) { diff --git a/discovery/metrics_refresh.go b/discovery/metrics_refresh.go index d621165ced..ef49e591a3 100644 --- a/discovery/metrics_refresh.go +++ b/discovery/metrics_refresh.go @@ -17,7 +17,7 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// Metric vectors for the "refresh" package. +// RefreshMetricsVecs are metric vectors for the "refresh" package. // We define them here in the "discovery" package in order to avoid a cyclic dependency between // "discovery" and "refresh". type RefreshMetricsVecs struct { diff --git a/discovery/util.go b/discovery/util.go index 83cc640dd9..4e2a088518 100644 --- a/discovery/util.go +++ b/discovery/util.go @@ -19,8 +19,8 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// A utility to be used by implementations of discovery.Discoverer -// which need to manage the lifetime of their metrics. +// MetricRegisterer is used by implementations of discovery.Discoverer that need +// to manage the lifetime of their metrics. type MetricRegisterer interface { RegisterMetrics() error UnregisterMetrics() @@ -34,7 +34,7 @@ type metricRegistererImpl struct { var _ MetricRegisterer = &metricRegistererImpl{} -// Creates an instance of a MetricRegisterer. +// NewMetricRegisterer creates an instance of a MetricRegisterer. // Typically called inside the implementation of the NewDiscoverer() method. func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer { return &metricRegistererImpl{ diff --git a/model/exemplar/exemplar.go b/model/exemplar/exemplar.go index 08f55374ef..2c28b17257 100644 --- a/model/exemplar/exemplar.go +++ b/model/exemplar/exemplar.go @@ -15,7 +15,9 @@ package exemplar import "github.com/prometheus/prometheus/model/labels" -// The combined length of the label names and values of an Exemplar's LabelSet MUST NOT exceed 128 UTF-8 characters +// ExemplarMaxLabelSetLength is defined by OpenMetrics: "The combined length of +// the label names and values of an Exemplar's LabelSet MUST NOT exceed 128 +// UTF-8 characters." // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#exemplars const ExemplarMaxLabelSetLength = 128 @@ -49,7 +51,7 @@ func (e Exemplar) Equals(e2 Exemplar) bool { return e.Value == e2.Value } -// Sort first by timestamp, then value, then labels. +// Compare first timestamps, then values, then labels. func Compare(a, b Exemplar) int { if a.Ts < b.Ts { return -1 diff --git a/model/labels/labels.go b/model/labels/labels.go index cd30f4f8ff..f4de7496ce 100644 --- a/model/labels/labels.go +++ b/model/labels/labels.go @@ -315,7 +315,8 @@ func Compare(a, b Labels) int { return len(a) - len(b) } -// Copy labels from b on top of whatever was in ls previously, reusing memory or expanding if needed. +// CopyFrom copies labels from b on top of whatever was in ls previously, +// reusing memory or expanding if needed. func (ls *Labels) CopyFrom(b Labels) { (*ls) = append((*ls)[:0], b...) } @@ -422,7 +423,7 @@ type ScratchBuilder struct { add Labels } -// Symbol-table is no-op, just for api parity with dedupelabels. +// SymbolTable is no-op, just for api parity with dedupelabels. type SymbolTable struct{} func NewSymbolTable() *SymbolTable { return nil } @@ -458,7 +459,7 @@ func (b *ScratchBuilder) Add(name, value string) { b.add = append(b.add, Label{Name: name, Value: value}) } -// Add a name/value pair, using []byte instead of string. +// UnsafeAddBytes adds a name/value pair, using []byte instead of string. // The '-tags stringlabels' version of this function is unsafe, hence the name. // This version is safe - it copies the strings immediately - but we keep the same name so everything compiles. func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) { @@ -475,14 +476,14 @@ func (b *ScratchBuilder) Assign(ls Labels) { b.add = append(b.add[:0], ls...) // Copy on top of our slice, so we don't retain the input slice. } -// Return the name/value pairs added so far as a Labels object. +// Labels returns the name/value pairs added so far as a Labels object. // Note: if you want them sorted, call Sort() first. func (b *ScratchBuilder) Labels() Labels { // Copy the slice, so the next use of ScratchBuilder doesn't overwrite. return append([]Label{}, b.add...) } -// Write the newly-built Labels out to ls. +// Overwrite the newly-built Labels out to ls. // Callers must ensure that there are no other references to ls, or any strings fetched from it. func (b *ScratchBuilder) Overwrite(ls *Labels) { *ls = append((*ls)[:0], b.add...) diff --git a/model/textparse/interface.go b/model/textparse/interface.go index df01dbc34f..0b5d9281e4 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -106,8 +106,8 @@ const ( EntryInvalid Entry = -1 EntryType Entry = 0 EntryHelp Entry = 1 - EntrySeries Entry = 2 // A series with a simple float64 as value. + EntrySeries Entry = 2 // EntrySeries marks a series with a simple float64 as value. EntryComment Entry = 3 EntryUnit Entry = 4 - EntryHistogram Entry = 5 // A series with a native histogram as a value. + EntryHistogram Entry = 5 // EntryHistogram marks a series with a native histogram as a value. ) diff --git a/promql/engine.go b/promql/engine.go index daa46ab6f2..60c8e06394 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -573,7 +573,7 @@ func (ng *Engine) validateOpts(expr parser.Expr) error { return validationErr } -// NewTestQuery: inject special behaviour into Query for testing. +// NewTestQuery injects special behaviour into Query for testing. func (ng *Engine) NewTestQuery(f func(context.Context) error) Query { qry := &query{ q: "test statement", @@ -3531,14 +3531,14 @@ func makeInt64Pointer(val int64) *int64 { return valp } -// Add RatioSampler interface to allow unit-testing (previously: Randomizer). +// RatioSampler allows unit-testing (previously: Randomizer). type RatioSampler interface { // Return this sample "offset" between [0.0, 1.0] sampleOffset(ts int64, sample *Sample) float64 AddRatioSample(r float64, sample *Sample) bool } -// Use Hash(labels.String()) / maxUint64 as a "deterministic" +// HashRatioSampler uses Hash(labels.String()) / maxUint64 as a "deterministic" // value in [0.0, 1.0]. type HashRatioSampler struct{} diff --git a/promql/parser/ast.go b/promql/parser/ast.go index 830e8a2c5e..162d7817ab 100644 --- a/promql/parser/ast.go +++ b/promql/parser/ast.go @@ -352,8 +352,7 @@ func (f inspector) Visit(node Node, path []Node) (Visitor, error) { // f(node, path); node must not be nil. If f returns a nil error, Inspect invokes f // for all the non-nil children of node, recursively. func Inspect(node Node, f inspector) { - //nolint: errcheck - Walk(f, node, nil) + Walk(f, node, nil) //nolint:errcheck } // Children returns a list of all child nodes of a syntax tree node. @@ -419,7 +418,7 @@ func mergeRanges(first, last Node) posrange.PositionRange { } } -// Item implements the Node interface. +// PositionRange implements the Node interface. // This makes it possible to call mergeRanges on them. func (i *Item) PositionRange() posrange.PositionRange { return posrange.PositionRange{ diff --git a/scrape/clientprotobuf.go b/scrape/clientprotobuf.go index 2213268d59..e632035b40 100644 --- a/scrape/clientprotobuf.go +++ b/scrape/clientprotobuf.go @@ -23,7 +23,7 @@ import ( dto "github.com/prometheus/client_model/go" ) -// Write a MetricFamily into a protobuf. +// MetricFamilyToProtobuf writes a MetricFamily into a protobuf. // This function is intended for testing scraping by providing protobuf serialized input. func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { buffer := &bytes.Buffer{} @@ -34,7 +34,7 @@ func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) { return buffer.Bytes(), nil } -// Append a MetricFamily protobuf representation to a buffer. +// AddMetricFamilyToProtobuf appends a MetricFamily protobuf representation to a buffer. // This function is intended for testing scraping by providing protobuf serialized input. func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error { protoBuf, err := proto.Marshal(metricFamily) diff --git a/storage/interface.go b/storage/interface.go index f85f985e9d..2f125e5902 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -227,9 +227,9 @@ type LabelHints struct { Limit int } -// TODO(bwplotka): Move to promql/engine_test.go? // QueryableFunc is an adapter to allow the use of ordinary functions as // Queryables. It follows the idea of http.HandlerFunc. +// TODO(bwplotka): Move to promql/engine_test.go? type QueryableFunc func(mint, maxt int64) (Querier, error) // Querier calls f() with the given parameters. diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index 58520c6a5d..82f46b82f6 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -31,13 +31,15 @@ import ( "github.com/google/uuid" ) +// Clouds. const ( - // Clouds. AzureChina = "AzureChina" AzureGovernment = "AzureGovernment" AzurePublic = "AzurePublic" +) - // Audiences. +// Audiences. +const ( IngestionChinaAudience = "https://monitor.azure.cn//.default" IngestionGovernmentAudience = "https://monitor.azure.us//.default" IngestionPublicAudience = "https://monitor.azure.com//.default" diff --git a/template/template.go b/template/template.go index c507dbe746..0698c6c8ac 100644 --- a/template/template.go +++ b/template/template.go @@ -166,7 +166,7 @@ func NewTemplateExpander( return html_template.HTML(text) }, "match": regexp.MatchString, - "title": strings.Title, //nolint:staticcheck + "title": strings.Title, //nolint:staticcheck // TODO(beorn7): Need to come up with a replacement using the cases package. "toUpper": strings.ToUpper, "toLower": strings.ToLower, "graphLink": strutil.GraphLinkForExpression, diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index 6c8707c57b..876b42cb26 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -191,7 +191,7 @@ func (f *chunkPos) bytesToWriteForChunk(chkLen uint64) uint64 { // ChunkDiskMapper is for writing the Head block chunks to disk // and access chunks via mmapped files. type ChunkDiskMapper struct { - /// Writer. + // Writer. dir *os.File writeBufferSize int @@ -210,7 +210,7 @@ type ChunkDiskMapper struct { crc32 hash.Hash writePathMtx sync.Mutex - /// Reader. + // Reader. // The int key in the map is the file number on the disk. mmappedChunkFiles map[int]*mmappedChunkFile // Contains the m-mapped files for each chunk file mapped with its index. closers map[int]io.Closer // Closers for resources behind the byte slices. diff --git a/tsdb/db.go b/tsdb/db.go index 706e5bbac1..a5b3a5e602 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -49,7 +49,7 @@ import ( ) const ( - // Default duration of a block in milliseconds. + // DefaultBlockDuration in milliseconds. DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond) // Block dir suffixes to make deletion and creation operations atomic. diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go index cd98fbd82f..88fdd30c85 100644 --- a/tsdb/encoding/encoding.go +++ b/tsdb/encoding/encoding.go @@ -201,8 +201,8 @@ func (d *Decbuf) UvarintStr() string { return string(d.UvarintBytes()) } -// The return value becomes invalid if the byte slice goes away. -// Compared to UvarintStr, this avoid allocations. +// UvarintBytes returns invalid values if the byte slice goes away. +// Compared to UvarintStr, it avoid allocations. func (d *Decbuf) UvarintBytes() []byte { l := d.Uvarint64() if d.E != nil { diff --git a/tsdb/head_other.go b/tsdb/head_other.go index eb1b93a3e5..fea91530dc 100644 --- a/tsdb/head_other.go +++ b/tsdb/head_other.go @@ -26,7 +26,7 @@ func (s *memSeries) labels() labels.Labels { return s.lset } -// No-op when not using dedupelabels. +// RebuildSymbolTable is a no-op when not using dedupelabels. func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { return nil } diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 3621054598..0e0e353719 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -196,8 +196,9 @@ func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) { return toc, d.Err() } -// NewWriter returns a new Writer to the given filename. It serializes data in format version 2. -// It uses the given encoder to encode each postings list. +// NewWriterWithEncoder returns a new Writer to the given filename. It +// serializes data in format version 2. It uses the given encoder to encode each +// postings list. func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncoder) (*Writer, error) { dir := filepath.Dir(fn) diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index cc98df4729..f1d06a421b 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -263,8 +263,8 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu return nil, mc, err } -// ChunkOrIterableWithCopy: implements ChunkReaderWithCopy. The special Copy behaviour -// is only implemented for the in-order head chunk. +// ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy +// behaviour is only implemented for the in-order head chunk. func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) { _, _, isOOO := unpackHeadChunkRef(meta.Ref) if !isOOO { diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index c1eed78f6d..86e6f9c81d 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -58,15 +58,16 @@ type WriteTo interface { StoreSeries([]record.RefSeries, int) StoreMetadata([]record.RefMetadata) - // Next two methods are intended for garbage-collection: first we call - // UpdateSeriesSegment on all current series + // UpdateSeriesSegment and SeriesReset are intended for + // garbage-collection: + // First we call UpdateSeriesSegment on all current series. UpdateSeriesSegment([]record.RefSeries, int) - // Then SeriesReset is called to allow the deletion - // of all series created in a segment lower than the argument. + // Then SeriesReset is called to allow the deletion of all series + // created in a segment lower than the argument. SeriesReset(int) } -// Used to notify the watcher that data has been written so that it can read. +// WriteNotified notifies the watcher that data has been written so that it can read. type WriteNotified interface { Notify() } diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index 993e930cef..b14521f358 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -38,8 +38,8 @@ import ( ) const ( - DefaultSegmentSize = 128 * 1024 * 1024 // 128 MB - pageSize = 32 * 1024 // 32KB + DefaultSegmentSize = 128 * 1024 * 1024 // DefaultSegmentSize is 128 MB. + pageSize = 32 * 1024 // pageSize is 32KB. recordHeaderSize = 7 WblDirName = "wbl" ) diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index bc5d76db43..b0272b7fee 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -174,7 +174,7 @@ func NewInvalidQuantileWarning(q float64, pos posrange.PositionRange) error { } } -// NewInvalidQuantileWarning is used when the user specifies an invalid ratio +// NewInvalidRatioWarning is used when the user specifies an invalid ratio // value, i.e. a float that is outside the range [-1, 1] or NaN. func NewInvalidRatioWarning(q, to float64, pos posrange.PositionRange) error { return annoErr{ diff --git a/util/testutil/cmp.go b/util/testutil/cmp.go index 370d191f3f..24d39d514c 100644 --- a/util/testutil/cmp.go +++ b/util/testutil/cmp.go @@ -23,13 +23,14 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Replacement for require.Equal using go-cmp adapted for Prometheus data structures, instead of DeepEqual. +// RequireEqual is a replacement for require.Equal using go-cmp adapted for +// Prometheus data structures, instead of DeepEqual. func RequireEqual(t testing.TB, expected, actual interface{}, msgAndArgs ...interface{}) { t.Helper() RequireEqualWithOptions(t, expected, actual, nil, msgAndArgs...) } -// As RequireEqual but allows extra cmp.Options. +// RequireEqualWithOptions works like RequireEqual but allows extra cmp.Options. func RequireEqualWithOptions(t testing.TB, expected, actual interface{}, extra []cmp.Option, msgAndArgs ...interface{}) { t.Helper() options := append([]cmp.Option{cmp.Comparer(labels.Equal)}, extra...) From c7c4a5c3476c39c4776eb5146ad9c93b85e4fb58 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 23 Aug 2024 09:28:31 +0800 Subject: [PATCH 32/83] add helper function to compare native histograms in testing Signed-off-by: Ziqi Zhao --- model/histogram/float_histogram.go | 14 +--- promql/promqltest/test.go | 119 ++++++++++++++++++++++++++++- util/almost/almost.go | 2 + 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index b3baaffb0d..2a37ea66d4 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -17,12 +17,6 @@ import ( "fmt" "math" "strings" - - "github.com/prometheus/prometheus/util/almost" -) - -const ( - defaultEpsilon = 0.000001 ) // FloatHistogram is similar to Histogram but uses float64 for all @@ -462,8 +456,8 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { } if h.Schema != h2.Schema || - !almost.Equal(h.Count, h2.Count, defaultEpsilon) || - !almost.Equal(h.Sum, h2.Sum, defaultEpsilon) { + math.Float64bits(h.Count) != math.Float64bits(h2.Count) || + math.Float64bits(h.Sum) != math.Float64bits(h2.Sum) { return false } @@ -474,7 +468,7 @@ func (h *FloatHistogram) Equals(h2 *FloatHistogram) bool { } if h.ZeroThreshold != h2.ZeroThreshold || - !almost.Equal(h.ZeroCount, h2.ZeroCount, defaultEpsilon) { + math.Float64bits(h.ZeroCount) != math.Float64bits(h2.ZeroCount) { return false } @@ -1316,7 +1310,7 @@ func FloatBucketsMatch(b1, b2 []float64) bool { return false } for i, b := range b1 { - if !almost.Equal(b, b2[i], defaultEpsilon) { + if math.Float64bits(b) != math.Float64bits(b2[i]) { return false } } diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index de67cae837..1773b63417 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -769,7 +769,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected histogram value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s)) } - if !actual.H.Compact(0).Equals(expected.H.Compact(0)) { + if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) { return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s)) } } @@ -804,7 +804,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if expH != nil && v.H == nil { return fmt.Errorf("expected histogram %s for %s but got float value %v", HistogramTestExpression(expH), v.Metric, v.F) } - if expH != nil && !expH.Compact(0).Equals(v.H.Compact(0)) { + if expH != nil && !compareNativeHistogram(expH.Compact(0), v.H.Compact(0)) { return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H)) } if !almost.Equal(exp0.Value, v.F, defaultEpsilon) { @@ -837,6 +837,121 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return nil } +// compareNativeHistogram is helper function to compare two native histograms +// which can tolerate some differ in the field of float type, such as Count, Sum +func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { + if exp == nil || cur == nil { + return false + } + + if exp.Schema != cur.Schema || + !almost.Equal(exp.Count, cur.Count, defaultEpsilon) || + !almost.Equal(exp.Sum, cur.Sum, defaultEpsilon) { + return false + } + + if exp.UsesCustomBuckets() { + if !histogram.FloatBucketsMatch(exp.CustomValues, cur.CustomValues) { + return false + } + } + + if exp.ZeroThreshold != cur.ZeroThreshold || + !almost.Equal(exp.ZeroCount, cur.ZeroCount, defaultEpsilon) { + return false + } + + if !spansMatch(exp.NegativeSpans, cur.NegativeSpans) { + return false + } + if !floatBucketsMatch(exp.NegativeBuckets, cur.NegativeBuckets) { + return false + } + + if !spansMatch(exp.PositiveSpans, cur.PositiveSpans) { + return false + } + if !floatBucketsMatch(exp.PositiveBuckets, cur.PositiveBuckets) { + return false + } + + return true +} + +func floatBucketsMatch(b1, b2 []float64) bool { + if len(b1) != len(b2) { + return false + } + for i, b := range b1 { + if !almost.Equal(b, b2[i], defaultEpsilon) { + return false + } + } + return true +} + +func spansMatch(s1, s2 []histogram.Span) bool { + if len(s1) == 0 && len(s2) == 0 { + return true + } + + s1idx, s2idx := 0, 0 + for { + if s1idx >= len(s1) { + return allEmptySpans(s2[s2idx:]) + } + if s2idx >= len(s2) { + return allEmptySpans(s1[s1idx:]) + } + + currS1, currS2 := s1[s1idx], s2[s2idx] + s1idx++ + s2idx++ + if currS1.Length == 0 { + // This span is zero length, so we add consecutive such spans + // until we find a non-zero span. + for ; s1idx < len(s1) && s1[s1idx].Length == 0; s1idx++ { + currS1.Offset += s1[s1idx].Offset + } + if s1idx < len(s1) { + currS1.Offset += s1[s1idx].Offset + currS1.Length = s1[s1idx].Length + s1idx++ + } + } + if currS2.Length == 0 { + // This span is zero length, so we add consecutive such spans + // until we find a non-zero span. + for ; s2idx < len(s2) && s2[s2idx].Length == 0; s2idx++ { + currS2.Offset += s2[s2idx].Offset + } + if s2idx < len(s2) { + currS2.Offset += s2[s2idx].Offset + currS2.Length = s2[s2idx].Length + s2idx++ + } + } + + if currS1.Length == 0 && currS2.Length == 0 { + // The last spans of both set are zero length. Previous spans match. + return true + } + + if currS1.Offset != currS2.Offset || currS1.Length != currS2.Length { + return false + } + } +} + +func allEmptySpans(s []histogram.Span) bool { + for _, ss := range s { + if ss.Length > 0 { + return false + } + } + return true +} + func (ev *evalCmd) checkExpectedFailure(actual error) error { if ev.expectedFailMessage != "" { if ev.expectedFailMessage != actual.Error() { diff --git a/util/almost/almost.go b/util/almost/almost.go index 1803c4b430..e0b5acb820 100644 --- a/util/almost/almost.go +++ b/util/almost/almost.go @@ -24,6 +24,8 @@ var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positiv // Equal returns true if a and b differ by less than their sum // multiplied by epsilon. func Equal(a, b, epsilon float64) bool { + // StaleNaN is a special value that is used as staleness maker, so + // the two values are equal when both are exactly equals to stale NaN if value.IsStaleNaN(a) || value.IsStaleNaN(b) { return value.IsStaleNaN(a) && value.IsStaleNaN(b) } From 87eab0aaad8af2348415b2d3582aacbffb613b23 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 23 Aug 2024 09:32:04 +0800 Subject: [PATCH 33/83] fix golang lint Signed-off-by: Ziqi Zhao --- promql/promqltest/test.go | 2 +- util/almost/almost.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 1773b63417..4d12dead9b 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -838,7 +838,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } // compareNativeHistogram is helper function to compare two native histograms -// which can tolerate some differ in the field of float type, such as Count, Sum +// which can tolerate some differ in the field of float type, such as Count, Sum. func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool { if exp == nil || cur == nil { return false diff --git a/util/almost/almost.go b/util/almost/almost.go index e0b5acb820..a404626586 100644 --- a/util/almost/almost.go +++ b/util/almost/almost.go @@ -25,7 +25,7 @@ var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positiv // multiplied by epsilon. func Equal(a, b, epsilon float64) bool { // StaleNaN is a special value that is used as staleness maker, so - // the two values are equal when both are exactly equals to stale NaN + // the two values are equal when both are exactly equals to stale NaN. if value.IsStaleNaN(a) || value.IsStaleNaN(b) { return value.IsStaleNaN(a) && value.IsStaleNaN(b) } From b0aba26ed5f2abf280bef3360672953ecb8d185b Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Fri, 23 Aug 2024 08:20:20 +0200 Subject: [PATCH 34/83] tsdb: Fix ValNone typo in comment Signed-off-by: Arve Knudsen --- tsdb/querier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/querier.go b/tsdb/querier.go index 2e15f0b084..912c950329 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -972,7 +972,7 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { // Check if the encoding has changed (i.e. we need to create a new // chunk as chunks can't have multiple encoding types). // For the first sample, the following condition will always be true as - // ValNoneNone != ValFloat | ValHistogram | ValFloatHistogram. + // ValNone != ValFloat | ValHistogram | ValFloatHistogram. if currentValueType != prevValueType { if prevValueType != chunkenc.ValNone { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) From 41c076196ef6626585cac34ec758e6582019c010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 22 Aug 2024 11:36:47 +0200 Subject: [PATCH 35/83] New cases in Test_ChunkQuerier_OOOQuery and Test_Querier_OOOQuery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Case 1: OOO in-memory head chunk overlaps with first mmaped in-order chunk. Query: |----------------------------------------------------------------| InO: |------mmap---------------||---------mem----------------------| OOO: |-----mem-----------| This triggers ChunkOrIterableWithCopy not including OOO head chunks bug. Similar to #14693 however testing the end of the interval doesn't trigger the problem because there the in-order head chunk will be trimmed with a tombstone, causing the code to switch to ChunkOrIterable which was fixed. See https://github.com/prometheus/prometheus/blob/a36d1a8a9261ba7169cf2fca862a606adc9f3d9d/tsdb/querier.go#L646 where len(p.bufIter.Intervals) will be non zero, because it includes the tombstone to trim the result to the query max time. Thus a new test is added to check the overlap at the beginning of the interval that has a separate chunk, which does not need trimming. Note: same test doesn't fail for sample querier in Test_Querier_OOOQuery as that doesn't use copy, that is copyHeadChunk is false in the if condition above. Case 2: OOO mmaped head chunk overlaps with first mmaped in-order chunk. Query: |----------------------------------------------------------------| InO: |------mmap---------------||---------mem----------------------| OOO: |-----mmap-----------| |--mem--| In this case the meta contains the reference of the in-order chunk and no indication that a merge is needed with the OOO mmaped chunk. Signed-off-by: György Krajcsovits --- tsdb/db_test.go | 377 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 304 insertions(+), 73 deletions(-) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 6d266e8724..4e3a077f6a 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -5036,16 +5036,15 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa func Test_Querier_OOOQuery(t *testing.T) { opts := DefaultOptions() - opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() series1 := labels.FromStrings("foo", "bar1") + type filterFunc func(t int64) bool + defaultFilterFunc := func(t int64) bool { return true } + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter func(int64) bool) ([]chunks.Sample, int) { - if filter == nil { - filter = func(int64) bool { return true } - } + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { @@ -5060,68 +5059,173 @@ func Test_Querier_OOOQuery(t *testing.T) { totalAppended++ } require.NoError(t, app.Commit()) + require.Positive(t, totalAppended, 0) // Sanity check that filter is not too zealous. return expSamples, totalAppended } + type sampleBatch struct { + minT int64 + maxT int64 + filter filterFunc + isOOO bool + } + tests := []struct { - name string - queryMinT int64 - queryMaxT int64 - inOrderMinT int64 - inOrderMaxT int64 - oooMinT int64 - oooMaxT int64 + name string + oooCap int64 + queryMinT int64 + queryMaxT int64 + batches []sampleBatch }{ { - name: "query interval covering ooomint and inordermaxt returns all ingested samples", - queryMinT: minutes(0), - queryMaxT: minutes(200), - inOrderMinT: minutes(100), - inOrderMaxT: minutes(200), - oooMinT: minutes(0), - oooMaxT: minutes(99), + name: "query interval covering ooomint and inordermaxt returns all ingested samples", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: defaultFilterFunc, + isOOO: true, + }, + }, }, { - name: "partial query interval returns only samples within interval", - queryMinT: minutes(20), - queryMaxT: minutes(180), - inOrderMinT: minutes(100), - inOrderMaxT: minutes(200), - oooMinT: minutes(0), - oooMaxT: minutes(99), + name: "partial query interval returns only samples within interval", + oooCap: 30, + queryMinT: minutes(20), + queryMaxT: minutes(180), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: defaultFilterFunc, + isOOO: true, + }, + }, }, { - name: "query overlapping inorder and ooo samples returns all ingested samples", - queryMinT: minutes(0), - queryMaxT: minutes(200), - inOrderMinT: minutes(100), - inOrderMaxT: minutes(200), - oooMinT: minutes(180 - opts.OutOfOrderCapMax/2), // Make sure to fit into the OOO head. - oooMaxT: minutes(180), + name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(170), + maxT: minutes(180), + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(100), + maxT: minutes(110), + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + oooCap: 5, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(101), + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + { + minT: minutes(191), + maxT: minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(101), + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + { + minT: minutes(191), + maxT: minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, }, } for _, tc := range tests { t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { + opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() defer func() { require.NoError(t, db.Close()) }() - var ( - expSamples []chunks.Sample - inoSamples int - ) + var expSamples []chunks.Sample + var oooSamples, appendedCount int - // Add in-order samples (at even minutes). - expSamples, inoSamples = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 0 }) - // Sanity check that filter is not too zealous. - require.Positive(t, inoSamples, 0) - - // Add out-of-order samples (at odd minutes). - expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples, func(t int64) bool { return t%2 == 1 }) - // Sanity check that filter is not too zealous. - require.Positive(t, oooSamples, 0) + for _, batch := range tc.batches { + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + if batch.isOOO { + oooSamples += appendedCount + } + } sort.Slice(expSamples, func(i, j int) bool { return expSamples[i].T() < expSamples[j].T() @@ -5147,11 +5251,17 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { series1 := labels.FromStrings("foo", "bar1") + type filterFunc func(t int64) bool + defaultFilterFunc := func(t int64) bool { return true } + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { + if !filter(m / time.Minute.Milliseconds()) { + continue + } _, err := app.Append(0, series1, m, float64(m)) if m >= queryMinT && m <= queryMaxT { expSamples = append(expSamples, sample{t: m, f: float64(m)}) @@ -5160,39 +5270,158 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { totalAppended++ } require.NoError(t, app.Commit()) + require.Positive(t, totalAppended) // Sanity check that filter is not too zealous. return expSamples, totalAppended } + type sampleBatch struct { + minT int64 + maxT int64 + filter filterFunc + isOOO bool + } + tests := []struct { - name string - queryMinT int64 - queryMaxT int64 - inOrderMinT int64 - inOrderMaxT int64 - oooMinT int64 - oooMaxT int64 + name string + oooCap int64 + queryMinT int64 + queryMaxT int64 + batches []sampleBatch }{ { - name: "query interval covering ooomint and inordermaxt returns all ingested samples", - queryMinT: minutes(0), - queryMaxT: minutes(200), - inOrderMinT: minutes(100), - inOrderMaxT: minutes(200), - oooMinT: minutes(0), - oooMaxT: minutes(99), + name: "query interval covering ooomint and inordermaxt returns all ingested samples", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: defaultFilterFunc, + isOOO: true, + }, + }, }, { - name: "partial query interval returns only samples within interval", - queryMinT: minutes(20), - queryMaxT: minutes(180), - inOrderMinT: minutes(100), - inOrderMaxT: minutes(200), - oooMinT: minutes(0), - oooMaxT: minutes(99), + name: "partial query interval returns only samples within interval", + oooCap: 30, + queryMinT: minutes(20), + queryMaxT: minutes(180), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: defaultFilterFunc, + isOOO: true, + }, + }, + }, + { + name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(170), + maxT: minutes(180), + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(100), + maxT: minutes(110), + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + oooCap: 5, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(101), + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + { + minT: minutes(191), + maxT: minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, + }, + { + name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + oooCap: 30, + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: false, + }, + { + minT: minutes(101), + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + { + minT: minutes(191), + maxT: minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk. + filter: func(t int64) bool { return t%2 == 1 }, + isOOO: true, + }, + }, }, } for _, tc := range tests { t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { + opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() defer func() { @@ -5200,12 +5429,14 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }() var expSamples []chunks.Sample + var oooSamples, appendedCount int - // Add in-order samples. - expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples) - - // Add out-of-order samples. - expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples) + for _, batch := range tc.batches { + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + if batch.isOOO { + oooSamples += appendedCount + } + } sort.Slice(expSamples, func(i, j int) bool { return expSamples[i].T() < expSamples[j].T() From 183bbc39a23123789baf77eb9f915103bb83fc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 22 Aug 2024 11:59:53 +0200 Subject: [PATCH 36/83] Make requesting merge with OOO head explicit in chunk.Meta MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- tsdb/chunks/chunks.go | 3 +++ tsdb/ooo_head_read.go | 17 +++++++++-------- tsdb/ooo_head_read_test.go | 9 +++++---- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index ec0f6d4036..69201c6db7 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -133,6 +133,9 @@ type Meta struct { // Time range the data covers. // When MaxTime == math.MaxInt64 the chunk is still open and being appended to. MinTime, MaxTime int64 + + // Flag to indicate that this meta needs merge with OOO data. + MergeOOO bool } // ChunkFromSamples requires all samples to have the same type. diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index f1d06a421b..a50decd623 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -91,10 +91,11 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) { tmpChks = append(tmpChks, chunks.Meta{ - MinTime: minT, - MaxTime: maxT, - Ref: ref, - Chunk: chunk, + MinTime: minT, + MaxTime: maxT, + Ref: ref, + Chunk: chunk, + MergeOOO: true, }) } @@ -160,6 +161,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap if c.Chunk != nil { (*chks)[len(*chks)-1].Chunk = c.Chunk } + (*chks)[len(*chks)-1].MergeOOO = (*chks)[len(*chks)-1].MergeOOO || c.MergeOOO } } @@ -241,8 +243,8 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, } func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) { - sid, _, isOOO := unpackHeadChunkRef(meta.Ref) - if !isOOO && meta.Chunk == nil { // meta.Chunk can have a copy of OOO head samples, even on non-OOO chunk ID. + sid, _, _ := unpackHeadChunkRef(meta.Ref) + if !meta.MergeOOO { return cr.cr.ChunkOrIterable(meta) } @@ -266,8 +268,7 @@ func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chu // ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy // behaviour is only implemented for the in-order head chunk. func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) { - _, _, isOOO := unpackHeadChunkRef(meta.Ref) - if !isOOO { + if !meta.MergeOOO { return cr.cr.ChunkOrIterableWithCopy(meta) } chk, iter, err := cr.ChunkOrIterable(meta) diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index f71d497320..e565933f83 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -308,9 +308,10 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { var expChunks []chunks.Meta for _, e := range tc.expChunks { meta := chunks.Meta{ - Chunk: chunkenc.Chunk(nil), - MinTime: e.mint, - MaxTime: e.maxt, + Chunk: chunkenc.Chunk(nil), + MinTime: e.mint, + MaxTime: e.maxt, + MergeOOO: true, // Only OOO chunks are tested here, so we always request merge from OOO head. } // Ref to whatever Ref the chunk has, that we refer to by ID @@ -484,7 +485,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { cr := NewHeadAndOOOChunkReader(db.head, 0, 1000, nil, nil, 0) defer cr.Close() c, iterable, err := cr.ChunkOrIterable(chunks.Meta{ - Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, + Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, MergeOOO: true, }) require.Nil(t, iterable) require.Equal(t, err, fmt.Errorf("not found")) From 436a439ed2aa8a1dfd36555b4cbaf22019619314 Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Fri, 23 Aug 2024 14:13:41 -0400 Subject: [PATCH 37/83] fix(utf8): fix config logic for name validation We should only overwrite the ScrapeConfig if it is empty. Added tests part of https://github.com/prometheus/prometheus/issues/13095 Signed-off-by: Owen Williams --- config/config.go | 4 +- config/config_test.go | 50 +++++++++++++++++++ .../scrape_config_default_validation_mode.yml | 2 + .../scrape_config_global_validation_mode.yml | 4 ++ ...pe_config_local_global_validation_mode.yml | 5 ++ .../scrape_config_local_validation_mode.yml | 3 ++ 6 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 config/testdata/scrape_config_default_validation_mode.yml create mode 100644 config/testdata/scrape_config_global_validation_mode.yml create mode 100644 config/testdata/scrape_config_local_global_validation_mode.yml create mode 100644 config/testdata/scrape_config_local_validation_mode.yml diff --git a/config/config.go b/config/config.go index 4326b0a992..d1e463d86c 100644 --- a/config/config.go +++ b/config/config.go @@ -781,7 +781,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { default: return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme) } - c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme + if c.MetricNameValidationScheme == "" { + c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme + } return nil } diff --git a/config/config_test.go b/config/config_test.go index 9b074bef1c..774aba1e28 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -16,6 +16,7 @@ package config import ( "crypto/tls" "encoding/json" + "fmt" "net/url" "os" "path/filepath" @@ -2300,3 +2301,52 @@ func TestScrapeConfigDisableCompression(t *testing.T) { require.False(t, got.ScrapeConfigs[0].EnableCompression) } + +func TestScrapeConfigNameValidationSettings(t *testing.T) { + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = model.LegacyValidation + }() + + tests := []struct { + name string + inputFile string + expectScheme string + }{ + { + name: "blank config implies default", + inputFile: "scrape_config_default_validation_mode", + expectScheme: "", + }, + { + name: "global setting implies local settings", + inputFile: "scrape_config_global_validation_mode", + expectScheme: "utf8", + }, + { + name: "local setting", + inputFile: "scrape_config_local_validation_mode", + expectScheme: "utf8", + }, + { + name: "local setting overrides global setting", + inputFile: "scrape_config_local_global_validation_mode", + expectScheme: "legacy", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger()) + require.NoError(t, err) + + out, err := yaml.Marshal(want) + + require.NoError(t, err) + got := &Config{} + require.NoError(t, yaml.UnmarshalStrict(out, got)) + + require.Equal(t, tc.expectScheme, got.ScrapeConfigs[0].MetricNameValidationScheme) + }) + } +} diff --git a/config/testdata/scrape_config_default_validation_mode.yml b/config/testdata/scrape_config_default_validation_mode.yml new file mode 100644 index 0000000000..96680d6438 --- /dev/null +++ b/config/testdata/scrape_config_default_validation_mode.yml @@ -0,0 +1,2 @@ +scrape_configs: + - job_name: prometheus diff --git a/config/testdata/scrape_config_global_validation_mode.yml b/config/testdata/scrape_config_global_validation_mode.yml new file mode 100644 index 0000000000..1548554397 --- /dev/null +++ b/config/testdata/scrape_config_global_validation_mode.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: utf8 +scrape_configs: + - job_name: prometheus diff --git a/config/testdata/scrape_config_local_global_validation_mode.yml b/config/testdata/scrape_config_local_global_validation_mode.yml new file mode 100644 index 0000000000..d13605e21d --- /dev/null +++ b/config/testdata/scrape_config_local_global_validation_mode.yml @@ -0,0 +1,5 @@ +global: + metric_name_validation_scheme: utf8 +scrape_configs: + - job_name: prometheus + metric_name_validation_scheme: legacy diff --git a/config/testdata/scrape_config_local_validation_mode.yml b/config/testdata/scrape_config_local_validation_mode.yml new file mode 100644 index 0000000000..fad4235806 --- /dev/null +++ b/config/testdata/scrape_config_local_validation_mode.yml @@ -0,0 +1,3 @@ +scrape_configs: + - job_name: prometheus + metric_name_validation_scheme: utf8 From d4994e5bc44490441a6a6ac05331cc6fbabae0f5 Mon Sep 17 00:00:00 2001 From: Devin Trejo Date: Fri, 23 Aug 2024 18:15:27 -0400 Subject: [PATCH 38/83] fix: Remote-write-reciever returns 4xx when request contains a time series with duplicate labels. (#14716) Signed-off-by: Devin Trejo --- storage/remote/write_handler.go | 13 +++++++++++-- storage/remote/write_handler_test.go | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 8ac7590465..9ea3f2bf93 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -236,11 +236,16 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err b := labels.NewScratchBuilder(0) for _, ts := range req.Timeseries { ls := ts.ToLabels(&b, nil) + + // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are + // potentially written. Perhaps unify with fixed writeV2 implementation a bit. if !ls.Has(labels.MetricName) || !ls.IsValid() { level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ - // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are - // potentially written. Perhaps unify with fixed writeV2 implementation a bit. + continue + } else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate { + level.Warn(h.logger).Log("msg", "Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) + samplesWithInvalidLabels++ continue } @@ -379,6 +384,10 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid metric name or labels, got %v", ls.String())) samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms) continue + } else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate { + badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid labels for series, labels %v, duplicated label %s", ls.String(), duplicateLabel)) + samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms) + continue } allSamplesSoFar := rs.AllSamples() diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index 6e1336a7aa..5c89a1ab95 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -338,6 +338,15 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { expectedCode: http.StatusBadRequest, expectedRespBody: "invalid metric name or labels, got {__name__=\"\"}\n", }, + { + desc: "Partial write; first series with duplicate labels", + input: append( + // Series with __name__="test_metric1",test_metric1="test_metric1",test_metric1="test_metric1" labels. + []writev2.TimeSeries{{LabelsRefs: []uint32{1, 2, 2, 2, 2, 2}, Samples: []writev2.Sample{{Value: 1, Timestamp: 1}}}}, + writeV2RequestFixture.Timeseries...), + expectedCode: http.StatusBadRequest, + expectedRespBody: "invalid labels for series, labels {__name__=\"test_metric1\", test_metric1=\"test_metric1\", test_metric1=\"test_metric1\"}, duplicated label test_metric1\n", + }, { desc: "Partial write; first series with one OOO sample", input: func() []writev2.TimeSeries { @@ -836,6 +845,13 @@ func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v return 0, storage.ErrDuplicateSampleForTimestamp } + if l.IsEmpty() { + return 0, tsdb.ErrInvalidSample + } + if _, hasDuplicates := l.HasDuplicateLabelNames(); hasDuplicates { + return 0, tsdb.ErrInvalidSample + } + m.latestSample[l.Hash()] = t m.samples = append(m.samples, mockSample{l, t, v}) return 0, nil @@ -887,6 +903,13 @@ func (m *mockAppendable) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t return 0, storage.ErrDuplicateSampleForTimestamp } + if l.IsEmpty() { + return 0, tsdb.ErrInvalidSample + } + if _, hasDuplicates := l.HasDuplicateLabelNames(); hasDuplicates { + return 0, tsdb.ErrInvalidSample + } + m.latestHistogram[l.Hash()] = t m.histograms = append(m.histograms, mockHistogram{l, t, h, fh}) return 0, nil From ef649d59688f71fbd624e849c8e20c6f4b8c98c4 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 26 Aug 2024 08:48:37 +0200 Subject: [PATCH 39/83] Revert " Store `mmMaxTime` in same field as `seriesShard`" Signed-off-by: Marco Pracucci --- tsdb/head.go | 46 +++++++++++----------------------------------- tsdb/head_read.go | 2 +- tsdb/head_test.go | 38 -------------------------------------- tsdb/head_wal.go | 13 +++++-------- 4 files changed, 17 insertions(+), 82 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 9d81b24ae4..b7bfaa0fda 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -178,7 +178,6 @@ type HeadOptions struct { WALReplayConcurrency int // EnableSharding enables ShardedPostings() support in the Head. - // EnableSharding is temporarily disabled during Init(). EnableSharding bool } @@ -610,7 +609,7 @@ const cardinalityCacheExpirationTime = time.Duration(30) * time.Second // Init loads data from the write ahead log and prepares the head for writes. // It should be called before using an appender so that it // limits the ingested samples to the head min valid time. -func (h *Head) Init(minValidTime int64) (err error) { +func (h *Head) Init(minValidTime int64) error { h.minValidTime.Store(minValidTime) defer func() { h.postings.EnsureOrder(h.opts.WALReplayConcurrency) @@ -624,24 +623,6 @@ func (h *Head) Init(minValidTime int64) (err error) { } }() - // If sharding is enabled, disable it while initializing, and calculate the shards later. - // We're going to use that field for other purposes during WAL replay, - // so we don't want to waste time on calculating the shard that we're going to lose anyway. - if h.opts.EnableSharding { - h.opts.EnableSharding = false - defer func() { - h.opts.EnableSharding = true - if err == nil { - // No locking is needed here as nobody should be writing while we're in Init. - for _, stripe := range h.series.series { - for _, s := range stripe { - s.shardHashOrMemoryMappedMaxTime = labels.StableHash(s.lset) - } - } - } - }() - } - level.Info(h.logger).Log("msg", "Replaying on-disk memory mappable chunks if any") start := time.Now() @@ -702,6 +683,7 @@ func (h *Head) Init(minValidTime int64) (err error) { mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk lastMmapRef chunks.ChunkDiskMapperRef + err error mmapChunkReplayDuration time.Duration ) @@ -2086,11 +2068,9 @@ type memSeries struct { ref chunks.HeadSeriesRef meta *metadata.Metadata - // Series labels hash to use for sharding purposes. - // The value is always 0 when sharding has not been explicitly enabled in TSDB. - // While the WAL replay the value stored here is the max time of any mmapped chunk, - // and the shard hash is re-calculated after WAL replay is complete. - shardHashOrMemoryMappedMaxTime uint64 + // Series labels hash to use for sharding purposes. The value is always 0 when sharding has not + // been explicitly enabled in TSDB. + shardHash uint64 // Everything after here should only be accessed with the lock held. sync.Mutex @@ -2115,6 +2095,8 @@ type memSeries struct { ooo *memSeriesOOOFields + mmMaxTime int64 // Max time of any mmapped chunk, only used during WAL replay. + nextAt int64 // Timestamp at which to cut the next chunk. histogramChunkHasComputedEndTime bool // True if nextAt has been predicted for the current histograms chunk; false otherwise. pendingCommit bool // Whether there are samples waiting to be committed to this series. @@ -2145,10 +2127,10 @@ type memSeriesOOOFields struct { func newMemSeries(lset labels.Labels, id chunks.HeadSeriesRef, shardHash uint64, isolationDisabled bool) *memSeries { s := &memSeries{ - lset: lset, - ref: id, - nextAt: math.MinInt64, - shardHashOrMemoryMappedMaxTime: shardHash, + lset: lset, + ref: id, + nextAt: math.MinInt64, + shardHash: shardHash, } if !isolationDisabled { s.txs = newTxRing(0) @@ -2236,12 +2218,6 @@ func (s *memSeries) truncateChunksBefore(mint int64, minOOOMmapRef chunks.ChunkD return removedInOrder + removedOOO } -// shardHash returns the shard hash of the series, only available after WAL replay. -func (s *memSeries) shardHash() uint64 { return s.shardHashOrMemoryMappedMaxTime } - -// mmMaxTime returns the max time of any mmapped chunk in the series, only available during WAL replay. -func (s *memSeries) mmMaxTime() int64 { return int64(s.shardHashOrMemoryMappedMaxTime) } - // cleanupAppendIDsBelow cleans up older appendIDs. Has to be called after // acquiring lock. func (s *memSeries) cleanupAppendIDsBelow(bound uint64) { diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 5e3a76273a..26cda3089f 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -170,7 +170,7 @@ func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCou } // Check if the series belong to the shard. - if s.shardHash()%shardCount != shardIndex { + if s.shardHash%shardCount != shardIndex { continue } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 0ce60b8494..a7a8847e80 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -23,7 +23,6 @@ import ( "path" "path/filepath" "reflect" - "runtime/pprof" "sort" "strconv" "strings" @@ -90,43 +89,6 @@ func newTestHeadWithOptions(t testing.TB, compressWAL wlog.CompressionType, opts return h, wal } -// BenchmarkLoadRealWLs will be skipped unless the BENCHMARK_LOAD_REAL_WLS_DIR environment variable is set. -// BENCHMARK_LOAD_REAL_WLS_DIR should be the folder where `wal` and `chunks_head` are located. -// Optionally, BENCHMARK_LOAD_REAL_WLS_PROFILE can be set to a file path to write a CPU profile. -func BenchmarkLoadRealWLs(b *testing.B) { - dir := os.Getenv("BENCHMARK_LOAD_REAL_WLS_DIR") - if dir == "" { - b.Skipped() - } - - profileFile := os.Getenv("BENCHMARK_LOAD_REAL_WLS_PROFILE") - if profileFile != "" { - b.Logf("Will profile in %s", profileFile) - f, err := os.Create(profileFile) - require.NoError(b, err) - b.Cleanup(func() { f.Close() }) - require.NoError(b, pprof.StartCPUProfile(f)) - b.Cleanup(pprof.StopCPUProfile) - } - - wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), wlog.CompressionNone) - require.NoError(b, err) - b.Cleanup(func() { wal.Close() }) - - wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), wlog.CompressionNone) - require.NoError(b, err) - b.Cleanup(func() { wbl.Close() }) - - // Load the WAL. - for i := 0; i < b.N; i++ { - opts := DefaultHeadOptions() - opts.ChunkDirRoot = dir - h, err := NewHead(nil, nil, wal, wbl, opts, nil) - require.NoError(b, err) - h.Init(0) - } -} - func BenchmarkCreateSeries(b *testing.B) { series := genSeries(b.N, 10, 0, 0) h, _ := newTestHead(b, 10000, wlog.CompressionNone, false) diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 7397bbf413..ef96b53305 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -435,8 +435,6 @@ Outer: return nil } -func minInt64() int64 { return math.MinInt64 } - // resetSeriesWithMMappedChunks is only used during the WAL replay. func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*mmappedChunk, walSeriesRef chunks.HeadSeriesRef) (overlapped bool) { if mSeries.ref != walSeriesRef { @@ -483,11 +481,10 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*m } // Cache the last mmapped chunk time, so we can skip calling append() for samples it will reject. if len(mmc) == 0 { - mSeries.shardHashOrMemoryMappedMaxTime = uint64(minInt64()) + mSeries.mmMaxTime = math.MinInt64 } else { - mmMaxTime := mmc[len(mmc)-1].maxTime - mSeries.shardHashOrMemoryMappedMaxTime = uint64(mmMaxTime) - h.updateMinMaxTime(mmc[0].minTime, mmMaxTime) + mSeries.mmMaxTime = mmc[len(mmc)-1].maxTime + h.updateMinMaxTime(mmc[0].minTime, mSeries.mmMaxTime) } if len(oooMmc) != 0 { // Mint and maxt can be in any chunk, they are not sorted. @@ -588,7 +585,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp unknownRefs++ continue } - if s.T <= ms.mmMaxTime() { + if s.T <= ms.mmMaxTime { continue } if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated { @@ -617,7 +614,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp unknownHistogramRefs++ continue } - if s.t <= ms.mmMaxTime() { + if s.t <= ms.mmMaxTime { continue } var chunkCreated bool From da28f88910660f852aa37017dcc240afc83359c0 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 27 Aug 2024 10:14:16 +0100 Subject: [PATCH 40/83] Cut release 2.54.1 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 8 ++++++++ VERSION | 2 +- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- web/ui/react-app/package.json | 4 ++-- 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8857495048..c07e5b5786 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 2.54.1 / 2024-08-27 + +* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685 +* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654 +* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538 +* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514 +* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605 + ## 2.54.0 / 2024-08-09 Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/). diff --git a/VERSION b/VERSION index 99aed793ad..3a40665f50 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.0 +2.54.1 diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index 6307f12f31..1c459b0906 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0", + "version": "0.54.1", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0", + "@prometheus-io/lezer-promql": "0.54.1", "lru-cache": "^7.18.3" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 14d263241e..8f9c0b1c85 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0", + "version": "0.54.1", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 0f5db9f6da..e305ee9644 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.54.0", + "version": "0.54.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.54.0", + "version": "0.54.1", "workspaces": [ "react-app", "module/*" @@ -30,10 +30,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.54.0", + "version": "0.54.1", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.54.0", + "@prometheus-io/lezer-promql": "0.54.1", "lru-cache": "^7.18.3" }, "devDependencies": { @@ -69,7 +69,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.54.0", + "version": "0.54.1", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.0", @@ -19332,7 +19332,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.54.0", + "version": "0.54.1", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -19350,7 +19350,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0", + "@prometheus-io/codemirror-promql": "0.54.1", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/package.json b/web/ui/package.json index 01dd71f34a..f97d7098b2 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -28,5 +28,5 @@ "ts-jest": "^29.2.2", "typescript": "^4.9.5" }, - "version": "0.54.0" + "version": "0.54.1" } diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index 01620531ae..ace3ef264e 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.54.0", + "version": "0.54.1", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.1", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.54.0", + "@prometheus-io/codemirror-promql": "0.54.1", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", From 7757794bb3f5c1335b7634f0e82bb588149486b2 Mon Sep 17 00:00:00 2001 From: Suraj Patil <31805557+patilsuraj767@users.noreply.github.com> Date: Wed, 28 Aug 2024 07:42:24 +0530 Subject: [PATCH 41/83] [ENHANCEMENT] Promtool: Adding labels to time series while creating tsdb blocks (#14403) * feat: #14402 - Adding labels to time series while creating tsdb blocks Signed-off-by: Suraj Patil --- cmd/promtool/backfill.go | 16 +++++++++--- cmd/promtool/backfill_test.go | 46 ++++++++++++++++++++++++++++++++++- cmd/promtool/main.go | 3 ++- cmd/promtool/tsdb.go | 4 +-- cmd/promtool/tsdb_test.go | 2 +- docs/command-line/promtool.md | 9 +++++++ 6 files changed, 71 insertions(+), 9 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 400cae421a..16491f0416 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -85,7 +85,7 @@ func getCompatibleBlockDuration(maxBlockDuration int64) int64 { return blockDuration } -func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) { +func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool, customLabels map[string]string) (returnErr error) { blockDuration := getCompatibleBlockDuration(maxBlockDuration) mint = blockDuration * (mint / blockDuration) @@ -102,6 +102,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn nextSampleTs int64 = math.MaxInt64 ) + lb := labels.NewBuilder(labels.EmptyLabels()) + for t := mint; t <= maxt; t += blockDuration { tsUpper := t + blockDuration if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper { @@ -162,7 +164,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn l := labels.Labels{} p.Metric(&l) - if _, err := app.Append(0, l, *ts, v); err != nil { + lb.Reset(l) + for name, value := range customLabels { + lb.Set(name, value) + } + lbls := lb.Labels() + + if _, err := app.Append(0, lbls, *ts, v); err != nil { return fmt.Errorf("add sample: %w", err) } @@ -221,13 +229,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn return nil } -func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) { +func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) (err error) { p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps. maxt, mint, err := getMinAndMaxTimestamps(p) if err != nil { return fmt.Errorf("getting min and max timestamp: %w", err) } - if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet); err != nil { + if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet, customLabels); err != nil { return fmt.Errorf("block creation: %w", err) } return nil diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go index 32abfa46a8..b818194e86 100644 --- a/cmd/promtool/backfill_test.go +++ b/cmd/promtool/backfill_test.go @@ -92,6 +92,7 @@ func TestBackfill(t *testing.T) { Description string MaxSamplesInAppender int MaxBlockDuration time.Duration + Labels map[string]string Expected struct { MinTime int64 MaxTime int64 @@ -636,6 +637,49 @@ http_requests_total{code="400"} 1024 7199 }, }, }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1 1624463088.000 +http_requests_total{code="200"} 2 1629503088.000 +http_requests_total{code="200"} 3 1629863088.000 +# EOF +`, + IsOk: true, + Description: "Sample with external labels.", + MaxSamplesInAppender: 5000, + MaxBlockDuration: 2048 * time.Hour, + Labels: map[string]string{"cluster_id": "123", "org_id": "999"}, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + BlockDuration int64 + Samples []backfillSample + }{ + MinTime: 1624463088000, + MaxTime: 1629863088000, + NumBlocks: 2, + BlockDuration: int64(1458 * time.Hour / time.Millisecond), + Samples: []backfillSample{ + { + Timestamp: 1624463088000, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"), + }, + { + Timestamp: 1629503088000, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"), + }, + { + Timestamp: 1629863088000, + Value: 3, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"), + }, + }, + }, + }, { ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds. # TYPE rpc_duration_seconds summary @@ -689,7 +733,7 @@ after_eof 1 2 outputDir := t.TempDir() - err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration) + err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration, test.Labels) if !test.IsOk { require.Error(t, err, test.Description) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 4d033c3c09..e713a177fd 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -253,6 +253,7 @@ func main() { importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool() maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("").Duration() openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.") + openMetricsLabels := openMetricsImportCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value").StringMap() importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String() importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String() importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.") @@ -408,7 +409,7 @@ func main() { os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) // TODO(aSquare14): Work on adding support for custom block size. case openMetricsImportCmd.FullCommand(): - os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration)) + os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels)) case importRulesCmd.FullCommand(): os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...))) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index b85a4fae8b..971ea8ab00 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -823,7 +823,7 @@ func checkErr(err error) int { return 0 } -func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int { +func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int { inputFile, err := fileutil.OpenMmapFile(path) if err != nil { return checkErr(err) @@ -834,7 +834,7 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB return checkErr(fmt.Errorf("create output dir: %w", err)) } - return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration)) + return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration, customLabels)) } func displayHistogram(dataType string, datas []int, total int) { diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index d7cc560881..ffc5467b47 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -186,7 +186,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) { dbDir := t.TempDir() // Import samples from OM format - err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour) + err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour, map[string]string{}) require.NoError(t, err) db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil) require.NoError(t, err) diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 0c397387ff..6d74200e65 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -641,6 +641,15 @@ Import samples from OpenMetrics input and produce TSDB blocks. Please refer to t +###### Flags + +| Flag | Description | +| --- | --- | +| --label | Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value | + + + + ###### Arguments | Argument | Description | Default | Required | From 406bf775aa1f8e319d557165ddf5ab39de0c8a19 Mon Sep 17 00:00:00 2001 From: riskrole Date: Wed, 28 Aug 2024 11:26:57 +0800 Subject: [PATCH 42/83] chore: fix some comments Signed-off-by: riskrole --- model/textparse/protobufparse.go | 4 ++-- rules/alerting_test.go | 2 +- tsdb/compact_test.go | 2 +- tsdb/head_read.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index ea3a2e1a34..f56def0128 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -47,7 +47,7 @@ import ( // the re-arrangement work is actually causing problems (which has to be seen), // that expectation needs to be changed. type ProtobufParser struct { - in []byte // The intput to parse. + in []byte // The input to parse. inPos int // Position within the input. metricPos int // Position within Metric slice. // fieldPos is the position within a Summary or (legacy) Histogram. -2 @@ -71,7 +71,7 @@ type ProtobufParser struct { mf *dto.MetricFamily - // Wether to also parse a classic histogram that is also present as a + // Whether to also parse a classic histogram that is also present as a // native histogram. parseClassicHistograms bool diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 5ebd049f66..4063329463 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -648,7 +648,7 @@ func TestAlertingRuleLimit(t *testing.T) { case err != nil: require.EqualError(t, err, test.err) case test.err != "": - t.Errorf("Expected errror %s, got none", test.err) + t.Errorf("Expected error %s, got none", test.err) } } } diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 0ea155d107..e7998abf7d 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -2018,7 +2018,7 @@ func TestDelayedCompaction(t *testing.T) { // This implies that the compaction delay doesn't block or wait on the initial trigger. // 3 is an arbitrary value because it's difficult to determine the precise value. require.GreaterOrEqual(t, prom_testutil.ToFloat64(db.metrics.compactionsTriggered)-prom_testutil.ToFloat64(db.metrics.compactionsSkipped), 3.0) - // The delay doesn't change the head blocks alignement. + // The delay doesn't change the head blocks alignment. require.Eventually(t, func() bool { return db.head.MinTime() == db.compactor.(*LeveledCompactor).ranges[0]+1 }, 500*time.Millisecond, 10*time.Millisecond) diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 5e3a76273a..24d33ed5c2 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -430,7 +430,7 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi // incremented by 1 when new chunk is created, hence (id - firstChunkID) gives the slice index. // The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix // is >= len(s.mmappedChunks), it represents one of the chunks on s.headChunks linked list. - // The order of elemens is different for slice and linked list. + // The order of elements is different for slice and linked list. // For s.mmappedChunks slice newer chunks are appended to it. // For s.headChunks list newer chunks are prepended to it. // From 3a82cd5a7ed4c5e9b530fafcb779c2e9c81e2aab Mon Sep 17 00:00:00 2001 From: Justin Lei Date: Tue, 27 Aug 2024 23:23:54 -0700 Subject: [PATCH 43/83] Add streaming remote read to ReadClient (#11379) * Add streaming remote read to ReadClient Signed-off-by: Justin Lei * Apply suggestions from code review Co-authored-by: Bartlomiej Plotka Signed-off-by: Justin Lei * Remote read instrumentation tweaks Signed-off-by: Justin Lei * Minor cleanups Signed-off-by: Justin Lei * In-line handleChunkedResponse Signed-off-by: Justin Lei * Fix lints Signed-off-by: Justin Lei * Explicitly call cancel() when needed Signed-off-by: Justin Lei * Update chunkedSeries, chunkedSeriesIterator for new interfaces Signed-off-by: Justin Lei * Adapt remote.chunkedSeries to use prompb.ChunkedSeries Signed-off-by: Justin Lei * Fix lint Signed-off-by: Justin Lei --------- Signed-off-by: Justin Lei Signed-off-by: Justin Lei Co-authored-by: Bartlomiej Plotka --- config/config.go | 18 +- config/config_test.go | 10 +- storage/remote/chunked.go | 4 - storage/remote/client.go | 110 ++++++++---- storage/remote/client_test.go | 229 +++++++++++++++++++++++ storage/remote/codec.go | 223 ++++++++++++++++++++++- storage/remote/codec_test.go | 269 ++++++++++++++++++++++++++++ storage/remote/read.go | 4 +- storage/remote/read_handler_test.go | 4 +- storage/remote/read_test.go | 5 +- storage/remote/storage.go | 1 + web/api/v1/api_test.go | 3 + 12 files changed, 819 insertions(+), 61 deletions(-) diff --git a/config/config.go b/config/config.go index d1e463d86c..c9e8efbf3e 100644 --- a/config/config.go +++ b/config/config.go @@ -221,6 +221,7 @@ var ( // DefaultRemoteReadConfig is the default remote read configuration. DefaultRemoteReadConfig = RemoteReadConfig{ RemoteTimeout: model.Duration(1 * time.Minute), + ChunkedReadLimit: DefaultChunkedReadLimit, HTTPClientConfig: config.DefaultHTTPClientConfig, FilterExternalLabels: true, } @@ -1279,13 +1280,20 @@ type MetadataConfig struct { MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"` } +const ( + // DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows. + // 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset. + DefaultChunkedReadLimit = 5e+7 +) + // RemoteReadConfig is the configuration for reading from remote storage. type RemoteReadConfig struct { - URL *config.URL `yaml:"url"` - RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` - Headers map[string]string `yaml:"headers,omitempty"` - ReadRecent bool `yaml:"read_recent,omitempty"` - Name string `yaml:"name,omitempty"` + URL *config.URL `yaml:"url"` + RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` + ChunkedReadLimit uint64 `yaml:"chunked_read_limit,omitempty"` + Headers map[string]string `yaml:"headers,omitempty"` + ReadRecent bool `yaml:"read_recent,omitempty"` + Name string `yaml:"name,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. diff --git a/config/config_test.go b/config/config_test.go index 774aba1e28..2219061823 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -165,10 +165,11 @@ var expectedConf = &Config{ RemoteReadConfigs: []*RemoteReadConfig{ { - URL: mustParseURL("http://remote1/read"), - RemoteTimeout: model.Duration(1 * time.Minute), - ReadRecent: true, - Name: "default", + URL: mustParseURL("http://remote1/read"), + RemoteTimeout: model.Duration(1 * time.Minute), + ChunkedReadLimit: DefaultChunkedReadLimit, + ReadRecent: true, + Name: "default", HTTPClientConfig: config.HTTPClientConfig{ FollowRedirects: true, EnableHTTP2: false, @@ -178,6 +179,7 @@ var expectedConf = &Config{ { URL: mustParseURL("http://remote3/read"), RemoteTimeout: model.Duration(1 * time.Minute), + ChunkedReadLimit: DefaultChunkedReadLimit, ReadRecent: false, Name: "read_special", RequiredMatchers: model.LabelSet{"job": "special"}, diff --git a/storage/remote/chunked.go b/storage/remote/chunked.go index 96ce483e0c..aa5addd6aa 100644 --- a/storage/remote/chunked.go +++ b/storage/remote/chunked.go @@ -26,10 +26,6 @@ import ( "github.com/gogo/protobuf/proto" ) -// DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows. -// 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset. -const DefaultChunkedReadLimit = 5e+7 - // The table gets initialized with sync.Once but may still cause a race // with any other use of the crc32 package anywhere. Thus we initialize it // before. diff --git a/storage/remote/client.go b/storage/remote/client.go index 2a66739ed9..62218cfba9 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -16,6 +16,7 @@ package remote import ( "bytes" "context" + "errors" "fmt" "io" "net/http" @@ -36,13 +37,14 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote/azuread" "github.com/prometheus/prometheus/storage/remote/googleiam" ) -const maxErrMsgLen = 1024 - const ( + maxErrMsgLen = 1024 + RemoteWriteVersionHeader = "X-Prometheus-Remote-Write-Version" RemoteWriteVersion1HeaderValue = "0.1.0" RemoteWriteVersion20HeaderValue = "2.0.0" @@ -68,9 +70,12 @@ var ( config.RemoteWriteProtoMsgV1: appProtoContentType, // Also application/x-protobuf;proto=prometheus.WriteRequest but simplified for compatibility with 1.x spec. config.RemoteWriteProtoMsgV2: appProtoContentType + ";proto=io.prometheus.write.v2.Request", } -) -var ( + AcceptedResponseTypes = []prompb.ReadRequest_ResponseType{ + prompb.ReadRequest_STREAMED_XOR_CHUNKS, + prompb.ReadRequest_SAMPLES, + } + remoteReadQueriesTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -78,7 +83,7 @@ var ( Name: "read_queries_total", Help: "The total number of remote read queries.", }, - []string{remoteName, endpoint, "code"}, + []string{remoteName, endpoint, "response_type", "code"}, ) remoteReadQueries = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -94,13 +99,13 @@ var ( Namespace: namespace, Subsystem: subsystem, Name: "read_request_duration_seconds", - Help: "Histogram of the latency for remote read requests.", + Help: "Histogram of the latency for remote read requests. Note that for streamed responses this is only the duration of the initial call and does not include the processing of the stream.", Buckets: append(prometheus.DefBuckets, 25, 60), NativeHistogramBucketFactor: 1.1, NativeHistogramMaxBucketNumber: 100, NativeHistogramMinResetDuration: 1 * time.Hour, }, - []string{remoteName, endpoint}, + []string{remoteName, endpoint, "response_type"}, ) ) @@ -116,10 +121,11 @@ type Client struct { timeout time.Duration retryOnRateLimit bool + chunkedReadLimit uint64 readQueries prometheus.Gauge readQueriesTotal *prometheus.CounterVec - readQueriesDuration prometheus.Observer + readQueriesDuration prometheus.ObserverVec writeProtoMsg config.RemoteWriteProtoMsg writeCompression Compression // Not exposed by ClientConfig for now. @@ -136,12 +142,13 @@ type ClientConfig struct { Headers map[string]string RetryOnRateLimit bool WriteProtoMsg config.RemoteWriteProtoMsg + ChunkedReadLimit uint64 } -// ReadClient uses the SAMPLES method of remote read to read series samples from remote server. -// TODO(bwplotka): Add streamed chunked remote read method as well (https://github.com/prometheus/prometheus/issues/5926). +// ReadClient will request the STREAMED_XOR_CHUNKS method of remote read but can +// also fall back to the SAMPLES method if necessary. type ReadClient interface { - Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error) + Read(ctx context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error) } // NewReadClient creates a new client for remote read. @@ -162,9 +169,10 @@ func NewReadClient(name string, conf *ClientConfig) (ReadClient, error) { urlString: conf.URL.String(), Client: httpClient, timeout: time.Duration(conf.Timeout), + chunkedReadLimit: conf.ChunkedReadLimit, readQueries: remoteReadQueries.WithLabelValues(name, conf.URL.String()), readQueriesTotal: remoteReadQueriesTotal.MustCurryWith(prometheus.Labels{remoteName: name, endpoint: conf.URL.String()}), - readQueriesDuration: remoteReadQueryDuration.WithLabelValues(name, conf.URL.String()), + readQueriesDuration: remoteReadQueryDuration.MustCurryWith(prometheus.Labels{remoteName: name, endpoint: conf.URL.String()}), }, nil } @@ -278,8 +286,8 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo return WriteResponseStats{}, RecoverableError{err, defaultBackoff} } defer func() { - io.Copy(io.Discard, httpResp.Body) - httpResp.Body.Close() + _, _ = io.Copy(io.Discard, httpResp.Body) + _ = httpResp.Body.Close() }() // TODO(bwplotka): Pass logger and emit debug on error? @@ -329,17 +337,17 @@ func (c *Client) Endpoint() string { return c.urlString } -// Read reads from a remote endpoint. -func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error) { +// Read reads from a remote endpoint. The sortSeries parameter is only respected in the case of a sampled response; +// chunked responses arrive already sorted by the server. +func (c *Client) Read(ctx context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error) { c.readQueries.Inc() defer c.readQueries.Dec() req := &prompb.ReadRequest{ // TODO: Support batching multiple queries into one read request, // as the protobuf interface allows for it. - Queries: []*prompb.Query{ - query, - }, + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: AcceptedResponseTypes, } data, err := proto.Marshal(req) if err != nil { @@ -358,7 +366,6 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe httpReq.Header.Set("X-Prometheus-Remote-Read-Version", "0.1.0") ctx, cancel := context.WithTimeout(ctx, c.timeout) - defer cancel() ctx, span := otel.Tracer("").Start(ctx, "Remote Read", trace.WithSpanKind(trace.SpanKindClient)) defer span.End() @@ -366,24 +373,58 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe start := time.Now() httpResp, err := c.Client.Do(httpReq.WithContext(ctx)) if err != nil { + cancel() return nil, fmt.Errorf("error sending request: %w", err) } - defer func() { - io.Copy(io.Discard, httpResp.Body) - httpResp.Body.Close() - }() - c.readQueriesDuration.Observe(time.Since(start).Seconds()) - c.readQueriesTotal.WithLabelValues(strconv.Itoa(httpResp.StatusCode)).Inc() - - compressed, err = io.ReadAll(httpResp.Body) - if err != nil { - return nil, fmt.Errorf("error reading response. HTTP status code: %s: %w", httpResp.Status, err) - } if httpResp.StatusCode/100 != 2 { - return nil, fmt.Errorf("remote server %s returned HTTP status %s: %s", c.urlString, httpResp.Status, strings.TrimSpace(string(compressed))) + // Make an attempt at getting an error message. + body, _ := io.ReadAll(httpResp.Body) + _ = httpResp.Body.Close() + + cancel() + return nil, fmt.Errorf("remote server %s returned http status %s: %s", c.urlString, httpResp.Status, string(body)) } + contentType := httpResp.Header.Get("Content-Type") + + switch { + case strings.HasPrefix(contentType, "application/x-protobuf"): + c.readQueriesDuration.WithLabelValues("sampled").Observe(time.Since(start).Seconds()) + c.readQueriesTotal.WithLabelValues("sampled", strconv.Itoa(httpResp.StatusCode)).Inc() + ss, err := c.handleSampledResponse(req, httpResp, sortSeries) + cancel() + return ss, err + case strings.HasPrefix(contentType, "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"): + c.readQueriesDuration.WithLabelValues("chunked").Observe(time.Since(start).Seconds()) + + s := NewChunkedReader(httpResp.Body, c.chunkedReadLimit, nil) + return NewChunkedSeriesSet(s, httpResp.Body, query.StartTimestampMs, query.EndTimestampMs, func(err error) { + code := strconv.Itoa(httpResp.StatusCode) + if !errors.Is(err, io.EOF) { + code = "aborted_stream" + } + c.readQueriesTotal.WithLabelValues("chunked", code).Inc() + cancel() + }), nil + default: + c.readQueriesDuration.WithLabelValues("unsupported").Observe(time.Since(start).Seconds()) + c.readQueriesTotal.WithLabelValues("unsupported", strconv.Itoa(httpResp.StatusCode)).Inc() + cancel() + return nil, fmt.Errorf("unsupported content type: %s", contentType) + } +} + +func (c *Client) handleSampledResponse(req *prompb.ReadRequest, httpResp *http.Response, sortSeries bool) (storage.SeriesSet, error) { + compressed, err := io.ReadAll(httpResp.Body) + if err != nil { + return nil, fmt.Errorf("error reading response. HTTP status code: %s: %w", httpResp.Status, err) + } + defer func() { + _, _ = io.Copy(io.Discard, httpResp.Body) + _ = httpResp.Body.Close() + }() + uncompressed, err := snappy.Decode(nil, compressed) if err != nil { return nil, fmt.Errorf("error reading response: %w", err) @@ -399,5 +440,8 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe return nil, fmt.Errorf("responses: want %d, got %d", len(req.Queries), len(resp.Results)) } - return resp.Results[0], nil + // This client does not batch queries so there's always only 1 result. + res := resp.Results[0] + + return FromQueryResult(sortSeries, res), nil } diff --git a/storage/remote/client_test.go b/storage/remote/client_test.go index 9184ce1008..c8b3d487e7 100644 --- a/storage/remote/client_test.go +++ b/storage/remote/client_test.go @@ -23,9 +23,15 @@ import ( "testing" "time" + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/tsdb/chunkenc" ) var longErrMessage = strings.Repeat("error message", maxErrMsgLen) @@ -208,3 +214,226 @@ func TestClientCustomHeaders(t *testing.T) { require.True(t, called, "The remote server wasn't called") } + +func TestReadClient(t *testing.T) { + tests := []struct { + name string + query *prompb.Query + httpHandler http.HandlerFunc + expectedLabels []map[string]string + expectedSamples [][]model.SamplePair + expectedErrorContains string + sortSeries bool + }{ + { + name: "sorted sampled response", + httpHandler: sampledResponseHTTPHandler(t), + expectedLabels: []map[string]string{ + {"foo1": "bar"}, + {"foo2": "bar"}, + }, + expectedSamples: [][]model.SamplePair{ + { + {Timestamp: model.Time(0), Value: model.SampleValue(3)}, + {Timestamp: model.Time(5), Value: model.SampleValue(4)}, + }, + { + {Timestamp: model.Time(0), Value: model.SampleValue(1)}, + {Timestamp: model.Time(5), Value: model.SampleValue(2)}, + }, + }, + expectedErrorContains: "", + sortSeries: true, + }, + { + name: "unsorted sampled response", + httpHandler: sampledResponseHTTPHandler(t), + expectedLabels: []map[string]string{ + {"foo2": "bar"}, + {"foo1": "bar"}, + }, + expectedSamples: [][]model.SamplePair{ + { + {Timestamp: model.Time(0), Value: model.SampleValue(1)}, + {Timestamp: model.Time(5), Value: model.SampleValue(2)}, + }, + { + {Timestamp: model.Time(0), Value: model.SampleValue(3)}, + {Timestamp: model.Time(5), Value: model.SampleValue(4)}, + }, + }, + expectedErrorContains: "", + sortSeries: false, + }, + { + name: "chunked response", + query: &prompb.Query{ + StartTimestampMs: 4000, + EndTimestampMs: 12000, + }, + httpHandler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse") + + flusher, ok := w.(http.Flusher) + require.True(t, ok) + + cw := NewChunkedWriter(w, flusher) + l := []prompb.Label{ + {Name: "foo", Value: "bar"}, + } + + chunks := buildTestChunks(t) + for i, c := range chunks { + cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}} + readResp := prompb.ChunkedReadResponse{ + ChunkedSeries: []*prompb.ChunkedSeries{&cSeries}, + QueryIndex: int64(i), + } + + b, err := proto.Marshal(&readResp) + require.NoError(t, err) + + _, err = cw.Write(b) + require.NoError(t, err) + } + }), + expectedLabels: []map[string]string{ + {"foo": "bar"}, + {"foo": "bar"}, + {"foo": "bar"}, + }, + // This is the output of buildTestChunks minus the samples outside the query range. + expectedSamples: [][]model.SamplePair{ + { + {Timestamp: model.Time(4000), Value: model.SampleValue(4)}, + }, + { + {Timestamp: model.Time(5000), Value: model.SampleValue(1)}, + {Timestamp: model.Time(6000), Value: model.SampleValue(2)}, + {Timestamp: model.Time(7000), Value: model.SampleValue(3)}, + {Timestamp: model.Time(8000), Value: model.SampleValue(4)}, + {Timestamp: model.Time(9000), Value: model.SampleValue(5)}, + }, + { + {Timestamp: model.Time(10000), Value: model.SampleValue(2)}, + {Timestamp: model.Time(11000), Value: model.SampleValue(3)}, + {Timestamp: model.Time(12000), Value: model.SampleValue(4)}, + }, + }, + expectedErrorContains: "", + }, + { + name: "unsupported content type", + httpHandler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "foobar") + }), + expectedErrorContains: "unsupported content type", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + server := httptest.NewServer(test.httpHandler) + defer server.Close() + + u, err := url.Parse(server.URL) + require.NoError(t, err) + + conf := &ClientConfig{ + URL: &config_util.URL{URL: u}, + Timeout: model.Duration(5 * time.Second), + ChunkedReadLimit: config.DefaultChunkedReadLimit, + } + c, err := NewReadClient("test", conf) + require.NoError(t, err) + + query := &prompb.Query{} + if test.query != nil { + query = test.query + } + + ss, err := c.Read(context.Background(), query, test.sortSeries) + if test.expectedErrorContains != "" { + require.ErrorContains(t, err, test.expectedErrorContains) + return + } + + require.NoError(t, err) + + i := 0 + + for ss.Next() { + require.NoError(t, ss.Err()) + s := ss.At() + + l := s.Labels() + require.Len(t, test.expectedLabels[i], l.Len()) + for k, v := range test.expectedLabels[i] { + require.True(t, l.Has(k)) + require.Equal(t, v, l.Get(k)) + } + + it := s.Iterator(nil) + j := 0 + + for valType := it.Next(); valType != chunkenc.ValNone; valType = it.Next() { + require.NoError(t, it.Err()) + + ts, v := it.At() + expectedSample := test.expectedSamples[i][j] + + require.Equal(t, int64(expectedSample.Timestamp), ts) + require.Equal(t, float64(expectedSample.Value), v) + + j++ + } + + require.Len(t, test.expectedSamples[i], j) + + i++ + } + + require.NoError(t, ss.Err()) + }) + } +} + +func sampledResponseHTTPHandler(t *testing.T) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/x-protobuf") + + resp := prompb.ReadResponse{ + Results: []*prompb.QueryResult{ + { + Timeseries: []*prompb.TimeSeries{ + { + Labels: []prompb.Label{ + {Name: "foo2", Value: "bar"}, + }, + Samples: []prompb.Sample{ + {Value: float64(1), Timestamp: int64(0)}, + {Value: float64(2), Timestamp: int64(5)}, + }, + Exemplars: []prompb.Exemplar{}, + }, + { + Labels: []prompb.Label{ + {Name: "foo1", Value: "bar"}, + }, + Samples: []prompb.Sample{ + {Value: float64(3), Timestamp: int64(0)}, + {Value: float64(4), Timestamp: int64(5)}, + }, + Exemplars: []prompb.Exemplar{}, + }, + }, + }, + }, + } + b, err := proto.Marshal(&resp) + require.NoError(t, err) + + _, err = w.Write(snappy.Encode(nil, b)) + require.NoError(t, err) + } +} diff --git a/storage/remote/codec.go b/storage/remote/codec.go index c9220ca42d..80bb811500 100644 --- a/storage/remote/codec.go +++ b/storage/remote/codec.go @@ -540,6 +540,220 @@ func (c *concreteSeriesIterator) Err() error { return nil } +// chunkedSeriesSet implements storage.SeriesSet. +type chunkedSeriesSet struct { + chunkedReader *ChunkedReader + respBody io.ReadCloser + mint, maxt int64 + cancel func(error) + + current storage.Series + err error +} + +func NewChunkedSeriesSet(chunkedReader *ChunkedReader, respBody io.ReadCloser, mint, maxt int64, cancel func(error)) storage.SeriesSet { + return &chunkedSeriesSet{ + chunkedReader: chunkedReader, + respBody: respBody, + mint: mint, + maxt: maxt, + cancel: cancel, + } +} + +// Next return true if there is a next series and false otherwise. It will +// block until the next series is available. +func (s *chunkedSeriesSet) Next() bool { + res := &prompb.ChunkedReadResponse{} + + err := s.chunkedReader.NextProto(res) + if err != nil { + if !errors.Is(err, io.EOF) { + s.err = err + _, _ = io.Copy(io.Discard, s.respBody) + } + + _ = s.respBody.Close() + s.cancel(err) + + return false + } + + s.current = &chunkedSeries{ + ChunkedSeries: prompb.ChunkedSeries{ + Labels: res.ChunkedSeries[0].Labels, + Chunks: res.ChunkedSeries[0].Chunks, + }, + mint: s.mint, + maxt: s.maxt, + } + + return true +} + +func (s *chunkedSeriesSet) At() storage.Series { + return s.current +} + +func (s *chunkedSeriesSet) Err() error { + return s.err +} + +func (s *chunkedSeriesSet) Warnings() annotations.Annotations { + return nil +} + +type chunkedSeries struct { + prompb.ChunkedSeries + mint, maxt int64 +} + +var _ storage.Series = &chunkedSeries{} + +func (s *chunkedSeries) Labels() labels.Labels { + b := labels.NewScratchBuilder(0) + return s.ToLabels(&b, nil) +} + +func (s *chunkedSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { + csIt, ok := it.(*chunkedSeriesIterator) + if ok { + csIt.reset(s.Chunks, s.mint, s.maxt) + return csIt + } + return newChunkedSeriesIterator(s.Chunks, s.mint, s.maxt) +} + +type chunkedSeriesIterator struct { + chunks []prompb.Chunk + idx int + cur chunkenc.Iterator + valType chunkenc.ValueType + mint, maxt int64 + + err error +} + +var _ chunkenc.Iterator = &chunkedSeriesIterator{} + +func newChunkedSeriesIterator(chunks []prompb.Chunk, mint, maxt int64) *chunkedSeriesIterator { + it := &chunkedSeriesIterator{} + it.reset(chunks, mint, maxt) + return it +} + +func (it *chunkedSeriesIterator) Next() chunkenc.ValueType { + if it.err != nil { + return chunkenc.ValNone + } + if len(it.chunks) == 0 { + return chunkenc.ValNone + } + + for it.valType = it.cur.Next(); it.valType != chunkenc.ValNone; it.valType = it.cur.Next() { + atT := it.AtT() + if atT > it.maxt { + it.chunks = nil // Exhaust this iterator so follow-up calls to Next or Seek return fast. + return chunkenc.ValNone + } + if atT >= it.mint { + return it.valType + } + } + + if it.idx >= len(it.chunks)-1 { + it.valType = chunkenc.ValNone + } else { + it.idx++ + it.resetIterator() + it.valType = it.Next() + } + + return it.valType +} + +func (it *chunkedSeriesIterator) Seek(t int64) chunkenc.ValueType { + if it.err != nil { + return chunkenc.ValNone + } + if len(it.chunks) == 0 { + return chunkenc.ValNone + } + + startIdx := it.idx + it.idx += sort.Search(len(it.chunks)-startIdx, func(i int) bool { + return it.chunks[startIdx+i].MaxTimeMs >= t + }) + if it.idx > startIdx { + it.resetIterator() + } else { + ts := it.cur.AtT() + if ts >= t { + return it.valType + } + } + + for it.valType = it.cur.Next(); it.valType != chunkenc.ValNone; it.valType = it.cur.Next() { + ts := it.cur.AtT() + if ts > it.maxt { + it.chunks = nil // Exhaust this iterator so follow-up calls to Next or Seek return fast. + return chunkenc.ValNone + } + if ts >= t && ts >= it.mint { + return it.valType + } + } + + it.valType = chunkenc.ValNone + return it.valType +} + +func (it *chunkedSeriesIterator) resetIterator() { + if it.idx < len(it.chunks) { + chunk := it.chunks[it.idx] + + decodedChunk, err := chunkenc.FromData(chunkenc.Encoding(chunk.Type), chunk.Data) + if err != nil { + it.err = err + return + } + + it.cur = decodedChunk.Iterator(nil) + } else { + it.cur = chunkenc.NewNopIterator() + } +} + +func (it *chunkedSeriesIterator) reset(chunks []prompb.Chunk, mint, maxt int64) { + it.chunks = chunks + it.mint = mint + it.maxt = maxt + it.idx = 0 + if len(chunks) > 0 { + it.resetIterator() + } +} + +func (it *chunkedSeriesIterator) At() (ts int64, v float64) { + return it.cur.At() +} + +func (it *chunkedSeriesIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { + return it.cur.AtHistogram(h) +} + +func (it *chunkedSeriesIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + return it.cur.AtFloatHistogram(fh) +} + +func (it *chunkedSeriesIterator) AtT() int64 { + return it.cur.AtT() +} + +func (it *chunkedSeriesIterator) Err() error { + return it.err +} + // validateLabelsAndMetricName validates the label names/values and metric names returned from remote read, // also making sure that there are no labels with duplicate names. func validateLabelsAndMetricName(ls []prompb.Label) error { @@ -612,15 +826,6 @@ func FromLabelMatchers(matchers []*prompb.LabelMatcher) ([]*labels.Matcher, erro return result, nil } -// LabelProtosToMetric unpack a []*prompb.Label to a model.Metric. -func LabelProtosToMetric(labelPairs []*prompb.Label) model.Metric { - metric := make(model.Metric, len(labelPairs)) - for _, l := range labelPairs { - metric[model.LabelName(l.Name)] = model.LabelValue(l.Value) - } - return metric -} - // DecodeWriteRequest from an io.Reader into a prompb.WriteRequest, handling // snappy decompression. // Used also by documentation/examples/remote_storage. diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 279d10e41b..404f1add75 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -16,6 +16,7 @@ package remote import ( "bytes" "fmt" + "io" "sync" "testing" @@ -24,6 +25,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/metadata" @@ -705,3 +707,270 @@ func (c *mockChunkIterator) Next() bool { func (c *mockChunkIterator) Err() error { return nil } + +func TestChunkedSeriesIterator(t *testing.T) { + t.Run("happy path", func(t *testing.T) { + chks := buildTestChunks(t) + + it := newChunkedSeriesIterator(chks, 2000, 12000) + + require.NoError(t, it.err) + require.NotNil(t, it.cur) + + // Initial next; advance to first valid sample of first chunk. + res := it.Next() + require.Equal(t, chunkenc.ValFloat, res) + require.NoError(t, it.Err()) + + ts, v := it.At() + require.Equal(t, int64(2000), ts) + require.Equal(t, float64(2), v) + + // Next to the second sample of the first chunk. + res = it.Next() + require.Equal(t, chunkenc.ValFloat, res) + require.NoError(t, it.Err()) + + ts, v = it.At() + require.Equal(t, int64(3000), ts) + require.Equal(t, float64(3), v) + + // Attempt to seek to the first sample of the first chunk (should return current sample). + res = it.Seek(0) + require.Equal(t, chunkenc.ValFloat, res) + + ts, v = it.At() + require.Equal(t, int64(3000), ts) + require.Equal(t, float64(3), v) + + // Seek to the end of the first chunk. + res = it.Seek(4000) + require.Equal(t, chunkenc.ValFloat, res) + + ts, v = it.At() + require.Equal(t, int64(4000), ts) + require.Equal(t, float64(4), v) + + // Next to the first sample of the second chunk. + res = it.Next() + require.Equal(t, chunkenc.ValFloat, res) + require.NoError(t, it.Err()) + + ts, v = it.At() + require.Equal(t, int64(5000), ts) + require.Equal(t, float64(1), v) + + // Seek to the second sample of the third chunk. + res = it.Seek(10999) + require.Equal(t, chunkenc.ValFloat, res) + require.NoError(t, it.Err()) + + ts, v = it.At() + require.Equal(t, int64(11000), ts) + require.Equal(t, float64(3), v) + + // Attempt to seek to something past the last sample (should return false and exhaust the iterator). + res = it.Seek(99999) + require.Equal(t, chunkenc.ValNone, res) + require.NoError(t, it.Err()) + + // Attempt to next past the last sample (should return false as the iterator is exhausted). + res = it.Next() + require.Equal(t, chunkenc.ValNone, res) + require.NoError(t, it.Err()) + }) + + t.Run("invalid chunk encoding error", func(t *testing.T) { + chks := buildTestChunks(t) + + // Set chunk type to an invalid value. + chks[0].Type = 8 + + it := newChunkedSeriesIterator(chks, 0, 14000) + + res := it.Next() + require.Equal(t, chunkenc.ValNone, res) + + res = it.Seek(1000) + require.Equal(t, chunkenc.ValNone, res) + + require.ErrorContains(t, it.err, "invalid chunk encoding") + require.Nil(t, it.cur) + }) + + t.Run("empty chunks", func(t *testing.T) { + emptyChunks := make([]prompb.Chunk, 0) + + it1 := newChunkedSeriesIterator(emptyChunks, 0, 1000) + require.Equal(t, chunkenc.ValNone, it1.Next()) + require.Equal(t, chunkenc.ValNone, it1.Seek(1000)) + require.NoError(t, it1.Err()) + + var nilChunks []prompb.Chunk + + it2 := newChunkedSeriesIterator(nilChunks, 0, 1000) + require.Equal(t, chunkenc.ValNone, it2.Next()) + require.Equal(t, chunkenc.ValNone, it2.Seek(1000)) + require.NoError(t, it2.Err()) + }) +} + +func TestChunkedSeries(t *testing.T) { + t.Run("happy path", func(t *testing.T) { + chks := buildTestChunks(t) + + s := chunkedSeries{ + ChunkedSeries: prompb.ChunkedSeries{ + Labels: []prompb.Label{ + {Name: "foo", Value: "bar"}, + {Name: "asdf", Value: "zxcv"}, + }, + Chunks: chks, + }, + } + + require.Equal(t, labels.FromStrings("asdf", "zxcv", "foo", "bar"), s.Labels()) + + it := s.Iterator(nil) + res := it.Next() // Behavior is undefined w/o the initial call to Next. + + require.Equal(t, chunkenc.ValFloat, res) + require.NoError(t, it.Err()) + + ts, v := it.At() + require.Equal(t, int64(0), ts) + require.Equal(t, float64(0), v) + }) +} + +func TestChunkedSeriesSet(t *testing.T) { + t.Run("happy path", func(t *testing.T) { + buf := &bytes.Buffer{} + flusher := &mockFlusher{} + + w := NewChunkedWriter(buf, flusher) + r := NewChunkedReader(buf, config.DefaultChunkedReadLimit, nil) + + chks := buildTestChunks(t) + l := []prompb.Label{ + {Name: "foo", Value: "bar"}, + } + + for i, c := range chks { + cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}} + readResp := prompb.ChunkedReadResponse{ + ChunkedSeries: []*prompb.ChunkedSeries{&cSeries}, + QueryIndex: int64(i), + } + + b, err := proto.Marshal(&readResp) + require.NoError(t, err) + + _, err = w.Write(b) + require.NoError(t, err) + } + + ss := NewChunkedSeriesSet(r, io.NopCloser(buf), 0, 14000, func(error) {}) + require.NoError(t, ss.Err()) + require.Nil(t, ss.Warnings()) + + res := ss.Next() + require.True(t, res) + require.NoError(t, ss.Err()) + + s := ss.At() + require.Equal(t, 1, s.Labels().Len()) + require.True(t, s.Labels().Has("foo")) + require.Equal(t, "bar", s.Labels().Get("foo")) + + it := s.Iterator(nil) + it.Next() + ts, v := it.At() + require.Equal(t, int64(0), ts) + require.Equal(t, float64(0), v) + + numResponses := 1 + for ss.Next() { + numResponses++ + } + require.Equal(t, numTestChunks, numResponses) + require.NoError(t, ss.Err()) + }) + + t.Run("chunked reader error", func(t *testing.T) { + buf := &bytes.Buffer{} + flusher := &mockFlusher{} + + w := NewChunkedWriter(buf, flusher) + r := NewChunkedReader(buf, config.DefaultChunkedReadLimit, nil) + + chks := buildTestChunks(t) + l := []prompb.Label{ + {Name: "foo", Value: "bar"}, + } + + for i, c := range chks { + cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}} + readResp := prompb.ChunkedReadResponse{ + ChunkedSeries: []*prompb.ChunkedSeries{&cSeries}, + QueryIndex: int64(i), + } + + b, err := proto.Marshal(&readResp) + require.NoError(t, err) + + b[0] = 0xFF // Corruption! + + _, err = w.Write(b) + require.NoError(t, err) + } + + ss := NewChunkedSeriesSet(r, io.NopCloser(buf), 0, 14000, func(error) {}) + require.NoError(t, ss.Err()) + require.Nil(t, ss.Warnings()) + + res := ss.Next() + require.False(t, res) + require.ErrorContains(t, ss.Err(), "proto: illegal wireType 7") + }) +} + +// mockFlusher implements http.Flusher. +type mockFlusher struct{} + +func (f *mockFlusher) Flush() {} + +const ( + numTestChunks = 3 + numSamplesPerTestChunk = 5 +) + +func buildTestChunks(t *testing.T) []prompb.Chunk { + startTime := int64(0) + chks := make([]prompb.Chunk, 0, numTestChunks) + + time := startTime + + for i := 0; i < numTestChunks; i++ { + c := chunkenc.NewXORChunk() + + a, err := c.Appender() + require.NoError(t, err) + + minTimeMs := time + + for j := 0; j < numSamplesPerTestChunk; j++ { + a.Append(time, float64(i+j)) + time += int64(1000) + } + + chks = append(chks, prompb.Chunk{ + MinTimeMs: minTimeMs, + MaxTimeMs: time, + Type: prompb.Chunk_XOR, + Data: c.Bytes(), + }) + } + + return chks +} diff --git a/storage/remote/read.go b/storage/remote/read.go index e54b14f1e3..2ec48784dc 100644 --- a/storage/remote/read.go +++ b/storage/remote/read.go @@ -165,11 +165,11 @@ func (q *querier) Select(ctx context.Context, sortSeries bool, hints *storage.Se return storage.ErrSeriesSet(fmt.Errorf("toQuery: %w", err)) } - res, err := q.client.Read(ctx, query) + res, err := q.client.Read(ctx, query, sortSeries) if err != nil { return storage.ErrSeriesSet(fmt.Errorf("remote_read: %w", err)) } - return newSeriesSetFilter(FromQueryResult(sortSeries, res), added) + return newSeriesSetFilter(res, added) } // addExternalLabels adds matchers for each external label. External labels diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index a681872687..4cd4647e72 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -179,7 +179,7 @@ func BenchmarkStreamReadEndpoint(b *testing.B) { require.Equal(b, 2, recorder.Code/100) var results []*prompb.ChunkedReadResponse - stream := NewChunkedReader(recorder.Result().Body, DefaultChunkedReadLimit, nil) + stream := NewChunkedReader(recorder.Result().Body, config.DefaultChunkedReadLimit, nil) for { res := &prompb.ChunkedReadResponse{} @@ -280,7 +280,7 @@ func TestStreamReadEndpoint(t *testing.T) { require.Equal(t, "", recorder.Result().Header.Get("Content-Encoding")) var results []*prompb.ChunkedReadResponse - stream := NewChunkedReader(recorder.Result().Body, DefaultChunkedReadLimit, nil) + stream := NewChunkedReader(recorder.Result().Body, config.DefaultChunkedReadLimit, nil) for { res := &prompb.ChunkedReadResponse{} err := stream.NextProto(res) diff --git a/storage/remote/read_test.go b/storage/remote/read_test.go index 357bdba1f5..d63cefc3fe 100644 --- a/storage/remote/read_test.go +++ b/storage/remote/read_test.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/testutil" ) @@ -198,7 +199,7 @@ type mockedRemoteClient struct { b labels.ScratchBuilder } -func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query) (*prompb.QueryResult, error) { +func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error) { if c.got != nil { return nil, fmt.Errorf("expected only one call to remote client got: %v", query) } @@ -227,7 +228,7 @@ func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query) (*prom q.Timeseries = append(q.Timeseries, &prompb.TimeSeries{Labels: s.Labels}) } } - return q, nil + return FromQueryResult(sortSeries, q), nil } func (c *mockedRemoteClient) reset() { diff --git a/storage/remote/storage.go b/storage/remote/storage.go index afa2d411a9..05634f1798 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -115,6 +115,7 @@ func (s *Storage) ApplyConfig(conf *config.Config) error { c, err := NewReadClient(name, &ClientConfig{ URL: rrConf.URL, Timeout: rrConf.RemoteTimeout, + ChunkedReadLimit: rrConf.ChunkedReadLimit, HTTPClientConfig: rrConf.HTTPClientConfig, Headers: rrConf.Headers, }) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index ba38ddc978..261ed6b61f 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -1074,6 +1074,9 @@ func setupRemote(s storage.Storage) *httptest.Server { } } + w.Header().Set("Content-Type", "application/x-protobuf") + w.Header().Set("Content-Encoding", "snappy") + if err := remote.EncodeReadResponse(&resp, w); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return From 9da75328ea76f4ebb9a02009b93331a8374f38cf Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Wed, 28 Aug 2024 11:15:42 -0400 Subject: [PATCH 44/83] fix(utf8): ensure correct validation when legacy mode turned on (#14736) fix(utf8): ensure correct validation when legacy mode turned on This depends on the included update of the prometheus/common dependency. --------- Signed-off-by: Owen Williams --- go.mod | 4 +- go.sum | 8 ++-- model/labels/labels_common.go | 19 ++++++-- model/labels/labels_test.go | 78 ++++++++++++++++++++++++++++++++- promql/parser/printer.go | 2 +- scrape/scrape.go | 8 +++- scrape/scrape_test.go | 37 ++++++++++++++++ storage/remote/write_handler.go | 5 ++- 8 files changed, 146 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 50d560bc3a..af327c64ad 100644 --- a/go.mod +++ b/go.mod @@ -52,9 +52,9 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.19.1 + github.com/prometheus/client_golang v1.20.0 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.55.0 + github.com/prometheus/common v0.56.0 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 github.com/prometheus/exporter-toolkit v0.11.0 diff --git a/go.sum b/go.sum index bd4aa4f6b3..933ef94201 100644 --- a/go.sum +++ b/go.sum @@ -608,8 +608,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= +github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -625,8 +625,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.56.0 h1:UffReloqkBtvtQEYDg2s+uDPGRrJyC6vZWPGXf6OhPY= +github.com/prometheus/common v0.56.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go index 6db86b03c7..d7bdc1e076 100644 --- a/model/labels/labels_common.go +++ b/model/labels/labels_common.go @@ -95,12 +95,23 @@ func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error { } // IsValid checks if the metric name or label names are valid. -func (ls Labels) IsValid() bool { +func (ls Labels) IsValid(validationScheme model.ValidationScheme) bool { err := ls.Validate(func(l Label) error { - if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) { - return strconv.ErrSyntax + if l.Name == model.MetricNameLabel { + // If the default validation scheme has been overridden with legacy mode, + // we need to call the special legacy validation checker. + if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation && !model.IsValidLegacyMetricName(string(model.LabelValue(l.Value))) { + return strconv.ErrSyntax + } + if !model.IsValidMetricName(model.LabelValue(l.Value)) { + return strconv.ErrSyntax + } } - if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() { + if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation { + if !model.LabelName(l.Name).IsValidLegacy() || !model.LabelValue(l.Value).IsValid() { + return strconv.ErrSyntax + } + } else if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() { return strconv.ErrSyntax } return nil diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index d8910cdc85..9208908311 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -21,6 +21,7 @@ import ( "strings" "testing" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -272,11 +273,86 @@ func TestLabels_IsValid(t *testing.T) { }, } { t.Run("", func(t *testing.T) { - require.Equal(t, test.expected, test.input.IsValid()) + require.Equal(t, test.expected, test.input.IsValid(model.LegacyValidation)) }) } } +func TestLabels_ValidationModes(t *testing.T) { + for _, test := range []struct { + input Labels + globalMode model.ValidationScheme + callMode model.ValidationScheme + expected bool + }{ + { + input: FromStrings( + "__name__", "test.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.UTF8Validation, + expected: true, + }, + { + input: FromStrings( + "__name__", "test", + "\xc5 bad utf8", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.UTF8Validation, + expected: false, + }, + { + // Setting the common model to legacy validation and then trying to check for UTF-8 on a + // per-call basis is not supported. + input: FromStrings( + "__name__", "test.utf8.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.LegacyValidation, + callMode: model.UTF8Validation, + expected: false, + }, + { + input: FromStrings( + "__name__", "test", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.LegacyValidation, + callMode: model.LegacyValidation, + expected: true, + }, + { + input: FromStrings( + "__name__", "test.utf8.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.LegacyValidation, + expected: false, + }, + { + input: FromStrings( + "__name__", "test", + "host.name", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.LegacyValidation, + expected: false, + }, + } { + model.NameValidationScheme = test.globalMode + require.Equal(t, test.expected, test.input.IsValid(test.callMode)) + } +} + func TestLabels_Equal(t *testing.T) { labels := FromStrings( "aaa", "111", diff --git a/promql/parser/printer.go b/promql/parser/printer.go index 5613956f7a..69f5f082d6 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -88,7 +88,7 @@ func (node *AggregateExpr) getAggOpStr() string { func joinLabels(ss []string) string { for i, s := range ss { // If the label is already quoted, don't quote it again. - if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(model.LabelValue(s)) { + if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(string(model.LabelValue(s))) { ss[i] = fmt.Sprintf("\"%s\"", s) } } diff --git a/scrape/scrape.go b/scrape/scrape.go index 222cc62f93..5fcd623c0d 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -111,6 +111,7 @@ type scrapeLoopOptions struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool + validationScheme model.ValidationScheme mrc []*relabel.Config cache *scrapeCache @@ -186,6 +187,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed options.PassMetadataInContext, metrics, options.skipOffsetting, + opts.validationScheme, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -346,6 +348,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { cache: cache, interval: interval, timeout: timeout, + validationScheme: validationScheme, }) ) if err != nil { @@ -853,6 +856,7 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool + validationScheme model.ValidationScheme // Feature flagged options. enableNativeHistogramIngestion bool @@ -1160,6 +1164,7 @@ func newScrapeLoop(ctx context.Context, passMetadataInContext bool, metrics *scrapeMetrics, skipOffsetting bool, + validationScheme model.ValidationScheme, ) *scrapeLoop { if l == nil { l = log.NewNopLogger() @@ -1211,6 +1216,7 @@ func newScrapeLoop(ctx context.Context, appendMetadataToWAL: appendMetadataToWAL, metrics: metrics, skipOffsetting: skipOffsetting, + validationScheme: validationScheme, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1631,7 +1637,7 @@ loop: err = errNameLabelMandatory break loop } - if !lset.IsValid() { + if !lset.IsValid(sl.validationScheme) { err = fmt.Errorf("invalid metric name or label names: %s", lset.String()) break loop } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 9b30615219..b703f21d46 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -684,6 +684,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, newTestScrapeMetrics(t), false, + model.LegacyValidation, ) } @@ -826,6 +827,7 @@ func TestScrapeLoopRun(t *testing.T) { false, scrapeMetrics, false, + model.LegacyValidation, ) // The loop must terminate during the initial offset if the context @@ -970,6 +972,7 @@ func TestScrapeLoopMetadata(t *testing.T) { false, scrapeMetrics, false, + model.LegacyValidation, ) defer cancel() @@ -1065,6 +1068,40 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { require.Equal(t, 0, seriesAdded) } +func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { + // Test that scrapes fail when default validation is utf8 but scrape config is + // legacy. + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = model.LegacyValidation + }() + s := teststorage.New(t) + defer s.Close() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sl := newBasicScrapeLoop(t, ctx, &testScraper{}, s.Appender, 0) + sl.validationScheme = model.LegacyValidation + + slApp := sl.appender(ctx) + total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + require.ErrorContains(t, err, "invalid metric name or label names") + require.NoError(t, slApp.Rollback()) + require.Equal(t, 1, total) + require.Equal(t, 0, added) + require.Equal(t, 0, seriesAdded) + + // When scrapeloop has validation set to UTF-8, the metric is allowed. + sl.validationScheme = model.UTF8Validation + + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + require.NoError(t, err) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 1, seriesAdded) +} + func makeTestMetrics(n int) []byte { // Construct a metrics string to parse sb := bytes.Buffer{} diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 9ea3f2bf93..58fb668cc1 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -28,6 +28,7 @@ import ( "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -239,7 +240,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are // potentially written. Perhaps unify with fixed writeV2 implementation a bit. - if !ls.Has(labels.MetricName) || !ls.IsValid() { + if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ continue @@ -380,7 +381,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // Validate series labels early. // NOTE(bwplotka): While spec allows UTF-8, Prometheus Receiver may impose // specific limits and follow https://prometheus.io/docs/specs/remote_write_spec_2_0/#invalid-samples case. - if !ls.Has(labels.MetricName) || !ls.IsValid() { + if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid metric name or labels, got %v", ls.String())) samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms) continue From c586c15ae6a0fdb4d02abb9b850bcfedb44644e9 Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 15 Nov 2023 11:41:12 +0100 Subject: [PATCH 45/83] fix(discovery): make discovery manager notify consumers of dropped targets for still defined jobs scrape/manager_test.go: add a test to check that the manager gets notified for targets that got dropped by discovery to reproduce: https://github.com/prometheus/prometheus/issues/12858#issuecomment-1732318102 Signed-off-by: machine424 --- discovery/manager.go | 41 ++-- discovery/manager_test.go | 5 +- scrape/manager.go | 2 +- scrape/manager_test.go | 413 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 446 insertions(+), 15 deletions(-) diff --git a/discovery/manager.go b/discovery/manager.go index 48bea85bb7..b7ebdb7e0b 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -212,9 +212,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { m.metrics.FailedConfigs.Set(float64(failedCount)) var ( - wg sync.WaitGroup - // keep shows if we keep any providers after reload. - keep bool + wg sync.WaitGroup newProviders []*Provider ) for _, prov := range m.providers { @@ -228,13 +226,12 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { continue } newProviders = append(newProviders, prov) - // refTargets keeps reference targets used to populate new subs' targets + // refTargets keeps reference targets used to populate new subs' targets as they should be the same. var refTargets map[string]*targetgroup.Group prov.mu.Lock() m.targetsMtx.Lock() for s := range prov.subs { - keep = true refTargets = m.targets[poolKey{s, prov.name}] // Remove obsolete subs' targets. if _, ok := prov.newSubs[s]; !ok { @@ -267,7 +264,9 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error { // While startProvider does pull the trigger, it may take some time to do so, therefore // we pull the trigger as soon as possible so that downstream managers can populate their state. // See https://github.com/prometheus/prometheus/pull/8639 for details. - if keep { + // This also helps making the downstream managers drop stale targets as soon as possible. + // See https://github.com/prometheus/prometheus/pull/13147 for details. + if len(m.providers) > 0 { select { case m.triggerSend <- struct{}{}: default: @@ -288,7 +287,9 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D name: {}, }, } + m.mtx.Lock() m.providers = append(m.providers, p) + m.mtx.Unlock() m.startProvider(ctx, p) } @@ -403,19 +404,33 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group { tSets := map[string][]*targetgroup.Group{} n := map[string]int{} + m.mtx.RLock() m.targetsMtx.Lock() - defer m.targetsMtx.Unlock() - for pkey, tsets := range m.targets { - for _, tg := range tsets { - // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager' - // to signal that it needs to stop all scrape loops for this target set. - tSets[pkey.setName] = append(tSets[pkey.setName], tg) - n[pkey.setName] += len(tg.Targets) + for _, p := range m.providers { + p.mu.RLock() + for s := range p.subs { + // Send empty lists for subs without any targets to make sure old stale targets are dropped by consumers. + // See: https://github.com/prometheus/prometheus/issues/12858 for details. + if _, ok := tSets[s]; !ok { + tSets[s] = []*targetgroup.Group{} + n[s] = 0 + } + if tsets, ok := m.targets[poolKey{s, p.name}]; ok { + for _, tg := range tsets { + tSets[s] = append(tSets[s], tg) + n[s] += len(tg.Targets) + } + } } + p.mu.RUnlock() } + m.targetsMtx.Unlock() + m.mtx.RUnlock() + for setName, v := range n { m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v)) } + return tSets } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index be07edbdb4..707c3931d7 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -939,11 +939,13 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { discoveryManager.ApplyConfig(c) // Original targets should be present as soon as possible. + // An empty list should be sent for prometheus2 to drop any stale targets syncedTargets = <-discoveryManager.SyncCh() mu.Unlock() - require.Len(t, syncedTargets, 1) + require.Len(t, syncedTargets, 2) verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true) require.Len(t, syncedTargets["prometheus"], 1) + require.Empty(t, syncedTargets["prometheus2"]) // prometheus2 configs should be ready on second sync. syncedTargets = <-discoveryManager.SyncCh() @@ -1275,6 +1277,7 @@ func TestCoordinationWithReceiver(t *testing.T) { Targets: []model.LabelSet{{"__instance__": "1"}}, }, }, + "mock1": {}, }, }, { diff --git a/scrape/manager.go b/scrape/manager.go index 6d4e8707bb..e3dba5f0ee 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -142,7 +142,7 @@ func (m *Manager) UnregisterMetrics() { func (m *Manager) reloader() { reloadIntervalDuration := m.opts.DiscoveryReloadInterval - if reloadIntervalDuration < model.Duration(5*time.Second) { + if reloadIntervalDuration == model.Duration(0) { reloadIntervalDuration = model.Duration(5 * time.Second) } diff --git a/scrape/manager_test.go b/scrape/manager_test.go index c8d9bd6980..a2a3bba6ff 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -20,6 +20,7 @@ import ( "net/http/httptest" "net/url" "os" + "sort" "strconv" "sync" "testing" @@ -36,6 +37,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" + _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -869,3 +871,414 @@ func TestUnregisterMetrics(t *testing.T) { manager.UnregisterMetrics() } } + +func applyConfig( + t *testing.T, + config string, + scrapeManager *Manager, + discoveryManager *discovery.Manager, +) { + t.Helper() + + cfg := loadConfiguration(t, config) + require.NoError(t, scrapeManager.ApplyConfig(cfg)) + + c := make(map[string]discovery.Configs) + scfgs, err := cfg.GetScrapeConfigs() + require.NoError(t, err) + for _, v := range scfgs { + c[v.JobName] = v.ServiceDiscoveryConfigs + } + require.NoError(t, discoveryManager.ApplyConfig(c)) +} + +func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manager) { + t.Helper() + + reg := prometheus.NewRegistry() + sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg)) + require.NoError(t, err) + discoveryManager := discovery.NewManager( + ctx, + log.NewNopLogger(), + reg, + sdMetrics, + discovery.Updatert(100*time.Millisecond), + ) + scrapeManager, err := NewManager( + &Options{DiscoveryReloadInterval: model.Duration(100 * time.Millisecond)}, + nil, + nopAppendable{}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + go discoveryManager.Run() + go scrapeManager.Run(discoveryManager.SyncCh()) + return discoveryManager, scrapeManager +} + +func writeIntoFile(t *testing.T, content, filePattern string) *os.File { + t.Helper() + + file, err := os.CreateTemp("", filePattern) + require.NoError(t, err) + _, err = file.WriteString(content) + require.NoError(t, err) + return file +} + +func requireTargets( + t *testing.T, + scrapeManager *Manager, + jobName string, + waitToAppear bool, + expectedTargets []string, +) { + t.Helper() + + require.Eventually(t, func() bool { + targets, ok := scrapeManager.TargetsActive()[jobName] + if !ok { + if waitToAppear { + return false + } + t.Fatalf("job %s shouldn't be dropped", jobName) + } + if expectedTargets == nil { + return targets == nil + } + if len(targets) != len(expectedTargets) { + return false + } + sTargets := []string{} + for _, t := range targets { + sTargets = append(sTargets, t.String()) + } + sort.Strings(expectedTargets) + sort.Strings(sTargets) + for i, t := range sTargets { + if t != expectedTargets[i] { + return false + } + } + return true + }, 1*time.Second, 100*time.Millisecond) +} + +// TestTargetDisappearsAfterProviderRemoved makes sure that when a provider is dropped, (only) its targets are dropped. +func TestTargetDisappearsAfterProviderRemoved(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + myJob := "my-job" + myJobSDTargetURL := "my:9876" + myJobStaticTargetURL := "my:5432" + + sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, myJobSDTargetURL) + sDFile := writeIntoFile(t, sdFileContent, "*targets.json") + + baseConfig := ` +scrape_configs: +- job_name: %s + static_configs: + - targets: ['%s'] + file_sd_configs: + - files: ['%s'] +` + + discoveryManager, scrapeManager := runManagers(t, ctx) + defer scrapeManager.Stop() + + applyConfig( + t, + fmt.Sprintf( + baseConfig, + myJob, + myJobStaticTargetURL, + sDFile.Name(), + ), + scrapeManager, + discoveryManager, + ) + // Make sure the jobs targets are taken into account + requireTargets( + t, + scrapeManager, + myJob, + true, + []string{ + fmt.Sprintf("http://%s/metrics", myJobSDTargetURL), + fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL), + }, + ) + + // Apply a new config where a provider is removed + baseConfig = ` +scrape_configs: +- job_name: %s + static_configs: + - targets: ['%s'] +` + applyConfig( + t, + fmt.Sprintf( + baseConfig, + myJob, + myJobStaticTargetURL, + ), + scrapeManager, + discoveryManager, + ) + // Make sure the corresponding target was dropped + requireTargets( + t, + scrapeManager, + myJob, + false, + []string{ + fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL), + }, + ) + + // Apply a new config with no providers + baseConfig = ` +scrape_configs: +- job_name: %s +` + applyConfig( + t, + fmt.Sprintf( + baseConfig, + myJob, + ), + scrapeManager, + discoveryManager, + ) + // Make sure the corresponding target was dropped + requireTargets( + t, + scrapeManager, + myJob, + false, + nil, + ) +} + +// TestOnlyProviderStaleTargetsAreDropped makes sure that when a job has only one provider with multiple targets +// and when the provider can no longer discover some of those targets, only those stale targets are dropped. +func TestOnlyProviderStaleTargetsAreDropped(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + jobName := "my-job" + jobTarget1URL := "foo:9876" + jobTarget2URL := "foo:5432" + + sdFile1Content := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTarget1URL) + sdFile2Content := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTarget2URL) + sDFile1 := writeIntoFile(t, sdFile1Content, "*targets.json") + sDFile2 := writeIntoFile(t, sdFile2Content, "*targets.json") + + baseConfig := ` +scrape_configs: +- job_name: %s + file_sd_configs: + - files: ['%s', '%s'] +` + discoveryManager, scrapeManager := runManagers(t, ctx) + defer scrapeManager.Stop() + + applyConfig( + t, + fmt.Sprintf(baseConfig, jobName, sDFile1.Name(), sDFile2.Name()), + scrapeManager, + discoveryManager, + ) + + // Make sure the job's targets are taken into account + requireTargets( + t, + scrapeManager, + jobName, + true, + []string{ + fmt.Sprintf("http://%s/metrics", jobTarget1URL), + fmt.Sprintf("http://%s/metrics", jobTarget2URL), + }, + ) + + // Apply the same config for the same job but with a non existing file to make the provider + // unable to discover some targets + applyConfig( + t, + fmt.Sprintf(baseConfig, jobName, sDFile1.Name(), "/idontexistdoi.json"), + scrapeManager, + discoveryManager, + ) + + // The old target should get dropped + requireTargets( + t, + scrapeManager, + jobName, + false, + []string{fmt.Sprintf("http://%s/metrics", jobTarget1URL)}, + ) +} + +// TestProviderStaleTargetsAreDropped makes sure that when a job has only one provider and when that provider +// should no longer discover targets, the targets of that provider are dropped. +// See: https://github.com/prometheus/prometheus/issues/12858 +func TestProviderStaleTargetsAreDropped(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + jobName := "my-job" + jobTargetURL := "foo:9876" + + sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTargetURL) + sDFile := writeIntoFile(t, sdFileContent, "*targets.json") + + baseConfig := ` +scrape_configs: +- job_name: %s + file_sd_configs: + - files: ['%s'] +` + discoveryManager, scrapeManager := runManagers(t, ctx) + defer scrapeManager.Stop() + + applyConfig( + t, + fmt.Sprintf(baseConfig, jobName, sDFile.Name()), + scrapeManager, + discoveryManager, + ) + + // Make sure the job's targets are taken into account + requireTargets( + t, + scrapeManager, + jobName, + true, + []string{ + fmt.Sprintf("http://%s/metrics", jobTargetURL), + }, + ) + + // Apply the same config for the same job but with a non existing file to make the provider + // unable to discover some targets + applyConfig( + t, + fmt.Sprintf(baseConfig, jobName, "/idontexistdoi.json"), + scrapeManager, + discoveryManager, + ) + + // The old target should get dropped + requireTargets( + t, + scrapeManager, + jobName, + false, + nil, + ) +} + +// TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when aone of them should no, +// longer discover targets, only the stale targets of that provier are dropped. +func TestOnlyStaleTargetsAreDropped(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + myJob := "my-job" + myJobSDTargetURL := "my:9876" + myJobStaticTargetURL := "my:5432" + otherJob := "other-job" + otherJobTargetURL := "other:1234" + + sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, myJobSDTargetURL) + sDFile := writeIntoFile(t, sdFileContent, "*targets.json") + + baseConfig := ` +scrape_configs: +- job_name: %s + static_configs: + - targets: ['%s'] + file_sd_configs: + - files: ['%s'] +- job_name: %s + static_configs: + - targets: ['%s'] +` + + discoveryManager, scrapeManager := runManagers(t, ctx) + defer scrapeManager.Stop() + + // Apply the initial config with an existing file + applyConfig( + t, + fmt.Sprintf( + baseConfig, + myJob, + myJobStaticTargetURL, + sDFile.Name(), + otherJob, + otherJobTargetURL, + ), + scrapeManager, + discoveryManager, + ) + // Make sure the jobs targets are taken into account + requireTargets( + t, + scrapeManager, + myJob, + true, + []string{ + fmt.Sprintf("http://%s/metrics", myJobSDTargetURL), + fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL), + }, + ) + requireTargets( + t, + scrapeManager, + otherJob, + true, + []string{fmt.Sprintf("http://%s/metrics", otherJobTargetURL)}, + ) + + // Apply the same config with a non existing file for myJob + applyConfig( + t, + fmt.Sprintf( + baseConfig, + myJob, + myJobStaticTargetURL, + "/idontexistdoi.json", + otherJob, + otherJobTargetURL, + ), + scrapeManager, + discoveryManager, + ) + + // Only the SD target should get dropped for myJob + requireTargets( + t, + scrapeManager, + myJob, + false, + []string{ + fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL), + }, + ) + // The otherJob should keep its target + requireTargets( + t, + scrapeManager, + otherJob, + false, + []string{fmt.Sprintf("http://%s/metrics", otherJobTargetURL)}, + ) +} From 5bd898863790bd94d9f762443aa21b84c3ac23b6 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Thu, 29 Aug 2024 04:40:17 -0300 Subject: [PATCH 46/83] Simplify 'TestManagerCTZeroIngestion' (#14756) Signed-off-by: Arthur Silva Sens --- scrape/manager_test.go | 59 +++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/scrape/manager_test.go b/scrape/manager_test.go index a2a3bba6ff..f260167b5e 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -724,8 +724,6 @@ func TestManagerCTZeroIngestion(t *testing.T) { name string counterSample *dto.Counter enableCTZeroIngestion bool - - expectedValues []float64 }{ { name: "disabled with CT on counter", @@ -734,7 +732,6 @@ func TestManagerCTZeroIngestion(t *testing.T) { // Timestamp does not matter as long as it exists in this test. CreatedTimestamp: timestamppb.Now(), }, - expectedValues: []float64{1.0}, }, { name: "enabled with CT on counter", @@ -744,7 +741,6 @@ func TestManagerCTZeroIngestion(t *testing.T) { CreatedTimestamp: timestamppb.Now(), }, enableCTZeroIngestion: true, - expectedValues: []float64{0.0, 1.0}, }, { name: "enabled without CT on counter", @@ -752,7 +748,6 @@ func TestManagerCTZeroIngestion(t *testing.T) { Value: proto.Float64(1.0), }, enableCTZeroIngestion: true, - expectedValues: []float64{1.0}, }, } { t.Run(tc.name, func(t *testing.T) { @@ -819,46 +814,44 @@ func TestManagerCTZeroIngestion(t *testing.T) { }) scrapeManager.reload() + var got []float64 // Wait for one scrape. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel() require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { - if countFloatSamples(app, mName) != len(tc.expectedValues) { - return fmt.Errorf("expected %v samples", tc.expectedValues) + app.mtx.Lock() + defer app.mtx.Unlock() + + // Check if scrape happened and grab the relevant samples, they have to be there - or it's a bug + // and it's not worth waiting. + for _, f := range app.resultFloats { + if f.metric.Get(model.MetricNameLabel) == mName { + got = append(got, f.f) + } } - return nil + if len(app.resultFloats) > 0 { + return nil + } + return fmt.Errorf("expected some samples, got none") }), "after 1 minute") scrapeManager.Stop() - require.Equal(t, tc.expectedValues, getResultFloats(app, mName)) + // Check for zero samples, assuming we only injected always one sample. + // Did it contain CT to inject? If yes, was CT zero enabled? + if tc.counterSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { + require.Len(t, got, 2) + require.Equal(t, 0.0, got[0]) + require.Equal(t, tc.counterSample.GetValue(), got[1]) + return + } + + // Expect only one, valid sample. + require.Len(t, got, 1) + require.Equal(t, tc.counterSample.GetValue(), got[0]) }) } } -func countFloatSamples(a *collectResultAppender, expectedMetricName string) (count int) { - a.mtx.Lock() - defer a.mtx.Unlock() - - for _, f := range a.resultFloats { - if f.metric.Get(model.MetricNameLabel) == expectedMetricName { - count++ - } - } - return count -} - -func getResultFloats(app *collectResultAppender, expectedMetricName string) (result []float64) { - app.mtx.Lock() - defer app.mtx.Unlock() - - for _, f := range app.resultFloats { - if f.metric.Get(model.MetricNameLabel) == expectedMetricName { - result = append(result, f.f) - } - } - return result -} - func TestUnregisterMetrics(t *testing.T) { reg := prometheus.NewRegistry() // Check that all metrics can be unregistered, allowing a second manager to be created. From ed2e18d7deb1abee96d68da3c03c6e0e67d7ed43 Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Mon, 12 Aug 2024 15:32:21 +0200 Subject: [PATCH 47/83] Add documentation for custom headers. Signed-off-by: Julien Pivotto Signed-off-by: Julien --- docs/configuration/configuration.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index cd1794dac7..a42126cf22 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -70,7 +70,7 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] - + # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. [ rule_query_offset: | default = 0s ] @@ -307,6 +307,17 @@ tls_config: [ proxy_connect_header: [ : [, ...] ] ] +# Custom HTTP headers to be sent along with each request. +# Headers that are set by Prometheus itself can't be overwritten. +http_headers: + # Header name. + [ : + # Header values. + [ values: [, ...] ] + # Headers values. Hidden in configuration page. + [ secrets: [, ...] ] + # Files to read header values from. + [ files: [, ...] ] ] # List of Azure service discovery configurations. azure_sd_configs: From 838e49e7b8dfc05967bf4ddc95025550b80cd5cd Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 23 Aug 2024 16:57:43 +0100 Subject: [PATCH 48/83] [REFACTOR] TSDB: move chunkFromSeries from headChunkReader to head Signed-off-by: Bryan Boreham --- tsdb/head_read.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 24d75f9c72..780eaa6690 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -376,12 +376,12 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc. s.Lock() defer s.Unlock() - return h.chunkFromSeries(s, cid, copyLastChunk) + return h.head.chunkFromSeries(s, cid, h.mint, h.maxt, h.isoState, copyLastChunk) } // Call with s locked. -func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, copyLastChunk bool) (chunkenc.Chunk, int64, error) { - c, headChunk, isOpen, err := s.chunk(cid, h.head.chunkDiskMapper, &h.head.memChunkPool) +func (h *Head) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, mint, maxt int64, isoState *isolationState, copyLastChunk bool) (chunkenc.Chunk, int64, error) { + c, headChunk, isOpen, err := s.chunk(cid, h.chunkDiskMapper, &h.memChunkPool) if err != nil { return nil, 0, err } @@ -390,12 +390,12 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, // Set this to nil so that Go GC can collect it after it has been used. c.chunk = nil c.prev = nil - h.head.memChunkPool.Put(c) + h.memChunkPool.Put(c) } }() // This means that the chunk is outside the specified range. - if !c.OverlapsClosedInterval(h.mint, h.maxt) { + if !c.OverlapsClosedInterval(mint, maxt) { return nil, 0, storage.ErrNotFound } @@ -407,7 +407,7 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, newB := make([]byte, len(b)) copy(newB, b) // TODO(codesome): Use bytes.Clone() when we upgrade to Go 1.20. // TODO(codesome): Put back in the pool (non-trivial). - chk, err = h.head.opts.ChunkPool.Get(s.headChunks.chunk.Encoding(), newB) + chk, err = h.opts.ChunkPool.Get(s.headChunks.chunk.Encoding(), newB) if err != nil { return nil, 0, err } @@ -417,7 +417,7 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, Chunk: chk, s: s, cid: cid, - isoState: h.isoState, + isoState: isoState, }, maxTime, nil } @@ -538,7 +538,7 @@ func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, case c.ref == 0: // This is an in-order head chunk. _, cid := chunks.HeadChunkRef(c.meta.Ref).Unpack() var err error - iterable, _, err = hr.chunkFromSeries(s, cid, false) + iterable, _, err = hr.head.chunkFromSeries(s, cid, hr.mint, hr.maxt, hr.isoState, false) if err != nil { return nil, fmt.Errorf("invalid head chunk: %w", err) } From cde42f30e9c91a23458661ee33d47f828fa63b43 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 23 Aug 2024 17:10:02 +0100 Subject: [PATCH 49/83] TSDB: streamline reading of overlapping head chunks `getOOOSeriesChunks` was already finding sets of overlapping chunks; we store those in a `multiMeta` struct so that `ChunkOrIterable` can reconstruct an `Iterable` easily and predictably. We no longer need a `MergeOOO` flag to indicate that this Meta should be merged with other ones; this is explicit in the `multiMeta` structure. We also no longer need `chunkMetaAndChunkDiskMapperRef`. Add `wrapOOOHeadChunk` to defeat `chunkenc.Pool` - chunks are reset during compaction, but if we wrap them (like `safeHeadChunk` was doing then this is skipped) . Signed-off-by: Bryan Boreham --- tsdb/chunks/chunks.go | 3 - tsdb/head_read.go | 101 ++++++-------------------- tsdb/ooo_head_read.go | 143 +++++++++++++++++++------------------ tsdb/ooo_head_read_test.go | 97 ++----------------------- 4 files changed, 98 insertions(+), 246 deletions(-) diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index 69201c6db7..ec0f6d4036 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -133,9 +133,6 @@ type Meta struct { // Time range the data covers. // When MaxTime == math.MaxInt64 the chunk is still open and being appended to. MinTime, MaxTime int64 - - // Flag to indicate that this meta needs merge with OOO data. - MergeOOO bool } // ChunkFromSamples requires all samples to have the same type. diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 780eaa6690..d81ffbb6a0 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -366,7 +366,7 @@ func (h *headChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Ch // If copyLastChunk is true, then it makes a copy of the head chunk if asked for it. // Also returns max time of the chunk. func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.Chunk, int64, error) { - sid, cid := chunks.HeadChunkRef(meta.Ref).Unpack() + sid, cid, isOOO := unpackHeadChunkRef(meta.Ref) s := h.head.series.getByID(sid) // This means that the series has been garbage collected. @@ -376,11 +376,20 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc. s.Lock() defer s.Unlock() - return h.head.chunkFromSeries(s, cid, h.mint, h.maxt, h.isoState, copyLastChunk) + return h.head.chunkFromSeries(s, cid, isOOO, h.mint, h.maxt, h.isoState, copyLastChunk) +} + +// Dumb thing to defeat chunk pool. +type wrapOOOHeadChunk struct { + chunkenc.Chunk } // Call with s locked. -func (h *Head) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, mint, maxt int64, isoState *isolationState, copyLastChunk bool) (chunkenc.Chunk, int64, error) { +func (h *Head) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, isOOO bool, mint, maxt int64, isoState *isolationState, copyLastChunk bool) (chunkenc.Chunk, int64, error) { + if isOOO { + chk, maxTime, err := s.oooChunk(cid, h.chunkDiskMapper, &h.memChunkPool) + return wrapOOOHeadChunk{chk}, maxTime, err + } c, headChunk, isOpen, err := s.chunk(cid, h.chunkDiskMapper, &h.memChunkPool) if err != nil { return nil, 0, err @@ -481,85 +490,19 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi return elem, true, offset == 0, nil } -// mergedChunks return an iterable over all chunks that overlap the -// time window [mint,maxt], plus meta.Chunk if populated. -// If hr is non-nil then in-order chunks are included. -// This function is not thread safe unless the caller holds a lock. -// The caller must ensure that s.ooo is not nil. -func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (chunkenc.Iterable, error) { - // We create a temporary slice of chunk metas to hold the information of all - // possible chunks that may overlap with the requested chunk. - tmpChks := make([]chunkMetaAndChunkDiskMapperRef, 0, len(s.ooo.oooMmappedChunks)+1) +// oooChunk returns the chunk for the HeadChunkID by m-mapping it from the disk. +// It never returns the head OOO chunk. +func (s *memSeries) oooChunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDiskMapper, memChunkPool *sync.Pool) (chunk chunkenc.Chunk, maxTime int64, err error) { + // ix represents the index of chunk in the s.ooo.oooMmappedChunks slice. The chunk id's are + // incremented by 1 when new chunk is created, hence (id - firstOOOChunkID) gives the slice index. + ix := int(id) - int(s.ooo.firstOOOChunkID) - for i, c := range s.ooo.oooMmappedChunks { - if maxMmapRef != 0 && c.ref > maxMmapRef { - break - } - if c.OverlapsClosedInterval(mint, maxt) { - tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{ - meta: chunks.Meta{ - MinTime: c.minTime, - MaxTime: c.maxTime, - Ref: chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i))), - }, - ref: c.ref, - }) - } - } - // Add in data copied from the head OOO chunk. - if meta.Chunk != nil { - tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{meta: meta}) + if ix < 0 || ix >= len(s.ooo.oooMmappedChunks) { + return nil, 0, storage.ErrNotFound } - if hr != nil { // Include in-order chunks. - metas := appendSeriesChunks(s, max(meta.MinTime, mint), min(meta.MaxTime, maxt), nil) - for _, m := range metas { - tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{ - meta: m, - ref: 0, // This tells the loop below it's an in-order head chunk. - }) - } - } - - // Next we want to sort all the collected chunks by min time so we can find - // those that overlap and stop when we know the rest don't. - slices.SortFunc(tmpChks, refLessByMinTimeAndMinRef) - - mc := &mergedOOOChunks{} - absoluteMax := int64(math.MinInt64) - for _, c := range tmpChks { - if c.meta.Ref != meta.Ref && (len(mc.chunkIterables) == 0 || c.meta.MinTime > absoluteMax) { - continue - } - var iterable chunkenc.Iterable - switch { - case c.meta.Chunk != nil: - iterable = c.meta.Chunk - case c.ref == 0: // This is an in-order head chunk. - _, cid := chunks.HeadChunkRef(c.meta.Ref).Unpack() - var err error - iterable, _, err = hr.head.chunkFromSeries(s, cid, hr.mint, hr.maxt, hr.isoState, false) - if err != nil { - return nil, fmt.Errorf("invalid head chunk: %w", err) - } - default: - chk, err := cdm.Chunk(c.ref) - if err != nil { - var cerr *chunks.CorruptionErr - if errors.As(err, &cerr) { - return nil, fmt.Errorf("invalid ooo mmapped chunk: %w", err) - } - return nil, err - } - iterable = chk - } - mc.chunkIterables = append(mc.chunkIterables, iterable) - if c.meta.MaxTime > absoluteMax { - absoluteMax = c.meta.MaxTime - } - } - - return mc, nil + chk, err := chunkDiskMapper.Chunk(s.ooo.oooMmappedChunks[ix].ref) + return chk, s.ooo.oooMmappedChunks[ix].maxTime, err } // safeHeadChunk makes sure that the chunk can be accessed without a race condition. diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index a50decd623..7b58ec566f 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -16,6 +16,7 @@ package tsdb import ( "context" "errors" + "fmt" "math" "slices" @@ -91,11 +92,10 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) { tmpChks = append(tmpChks, chunks.Meta{ - MinTime: minT, - MaxTime: maxT, - Ref: ref, - Chunk: chunk, - MergeOOO: true, + MinTime: minT, + MaxTime: maxT, + Ref: ref, + Chunk: chunk, }) } @@ -140,34 +140,39 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap // those that overlap. slices.SortFunc(tmpChks, lessByMinTimeAndMinRef) - // Next we want to iterate the sorted collected chunks and only return the - // chunks Meta the first chunk that overlaps with others. + // Next we want to iterate the sorted collected chunks and return composites for chunks that overlap with others. // Example chunks of a series: 5:(100, 200) 6:(500, 600) 7:(150, 250) 8:(550, 650) - // In the example 5 overlaps with 7 and 6 overlaps with 8 so we only want to - // return chunk Metas for chunk 5 and chunk 6e - *chks = append(*chks, tmpChks[0]) - maxTime := tmpChks[0].MaxTime // Tracks the maxTime of the previous "to be merged chunk". + // In the example 5 overlaps with 7 and 6 overlaps with 8 so we will return + // [5,7], [6,8]. + toBeMerged := tmpChks[0] for _, c := range tmpChks[1:] { - switch { - case c.MinTime > maxTime: - *chks = append(*chks, c) - maxTime = c.MaxTime - case c.MaxTime > maxTime: - maxTime = c.MaxTime - (*chks)[len(*chks)-1].MaxTime = c.MaxTime - fallthrough - default: - // If the head OOO chunk is part of an output chunk, copy the chunk pointer. - if c.Chunk != nil { - (*chks)[len(*chks)-1].Chunk = c.Chunk + if c.MinTime > toBeMerged.MaxTime { + // This chunk doesn't overlap. Send current toBeMerged to output and start a new one. + *chks = append(*chks, toBeMerged) + toBeMerged = c + } else { + // Merge this chunk with existing toBeMerged. + if mm, ok := toBeMerged.Chunk.(*multiMeta); ok { + mm.metas = append(mm.metas, c) + } else { + toBeMerged.Chunk = &multiMeta{metas: []chunks.Meta{toBeMerged, c}} + } + if toBeMerged.MaxTime < c.MaxTime { + toBeMerged.MaxTime = c.MaxTime } - (*chks)[len(*chks)-1].MergeOOO = (*chks)[len(*chks)-1].MergeOOO || c.MergeOOO } } + *chks = append(*chks, toBeMerged) return nil } +// Fake Chunk object to pass a set of Metas inside Meta.Chunk. +type multiMeta struct { + chunkenc.Chunk // We don't expect any of the methods to be called. + metas []chunks.Meta +} + // LabelValues needs to be overridden from the headIndexReader implementation // so we can return labels within either in-order range or ooo range. func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) { @@ -182,29 +187,6 @@ func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, m return labelValuesWithMatchers(ctx, oh, name, matchers...) } -type chunkMetaAndChunkDiskMapperRef struct { - meta chunks.Meta - ref chunks.ChunkDiskMapperRef -} - -func refLessByMinTimeAndMinRef(a, b chunkMetaAndChunkDiskMapperRef) int { - switch { - case a.meta.MinTime < b.meta.MinTime: - return -1 - case a.meta.MinTime > b.meta.MinTime: - return 1 - } - - switch { - case a.meta.Ref < b.meta.Ref: - return -1 - case a.meta.Ref > b.meta.Ref: - return 1 - default: - return 0 - } -} - func lessByMinTimeAndMinRef(a, b chunks.Meta) int { switch { case a.MinTime < b.MinTime: @@ -243,36 +225,55 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader, } func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) { - sid, _, _ := unpackHeadChunkRef(meta.Ref) - if !meta.MergeOOO { - return cr.cr.ChunkOrIterable(meta) - } - - s := cr.head.series.getByID(sid) - // This means that the series has been garbage collected. - if s == nil { - return nil, nil, storage.ErrNotFound - } - - s.Lock() - if s.ooo == nil { // Must have s.ooo non-nil to call mergedChunks(). - s.Unlock() - return cr.cr.ChunkOrIterable(meta) - } - mc, err := s.mergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef) - s.Unlock() - - return nil, mc, err + c, it, _, err := cr.chunkOrIterable(meta, false) + return c, it, err } // ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy // behaviour is only implemented for the in-order head chunk. func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) { - if !meta.MergeOOO { - return cr.cr.ChunkOrIterableWithCopy(meta) + return cr.chunkOrIterable(meta, true) +} + +func (cr *HeadAndOOOChunkReader) chunkOrIterable(meta chunks.Meta, copyLastChunk bool) (chunkenc.Chunk, chunkenc.Iterable, int64, error) { + sid, cid, isOOO := unpackHeadChunkRef(meta.Ref) + s := cr.head.series.getByID(sid) + // This means that the series has been garbage collected. + if s == nil { + return nil, nil, 0, storage.ErrNotFound } - chk, iter, err := cr.ChunkOrIterable(meta) - return chk, iter, 0, err + var isoState *isolationState + if cr.cr != nil { + isoState = cr.cr.isoState + } + + s.Lock() + defer s.Unlock() + + if meta.Chunk == nil { + c, maxt, err := cr.head.chunkFromSeries(s, cid, isOOO, meta.MinTime, meta.MaxTime, isoState, copyLastChunk) + return c, nil, maxt, err + } + mm, ok := meta.Chunk.(*multiMeta) + if !ok { // Complete chunk was supplied. + return meta.Chunk, nil, meta.MaxTime, nil + } + // We have a composite meta: construct a composite iterable. + mc := &mergedOOOChunks{} + for _, m := range mm.metas { + switch { + case m.Chunk != nil: + mc.chunkIterables = append(mc.chunkIterables, m.Chunk) + default: + _, cid, isOOO := unpackHeadChunkRef(m.Ref) + iterable, _, err := cr.head.chunkFromSeries(s, cid, isOOO, m.MinTime, m.MaxTime, isoState, copyLastChunk) + if err != nil { + return nil, nil, 0, fmt.Errorf("invalid head chunk: %w", err) + } + mc.chunkIterables = append(mc.chunkIterables, iterable) + } + } + return nil, mc, meta.MaxTime, nil } func (cr *HeadAndOOOChunkReader) Close() error { diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index e565933f83..545bc7b6eb 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -308,10 +308,9 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { var expChunks []chunks.Meta for _, e := range tc.expChunks { meta := chunks.Meta{ - Chunk: chunkenc.Chunk(nil), - MinTime: e.mint, - MaxTime: e.maxt, - MergeOOO: true, // Only OOO chunks are tested here, so we always request merge from OOO head. + Chunk: chunkenc.Chunk(nil), + MinTime: e.mint, + MaxTime: e.maxt, } // Ref to whatever Ref the chunk has, that we refer to by ID @@ -485,7 +484,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { cr := NewHeadAndOOOChunkReader(db.head, 0, 1000, nil, nil, 0) defer cr.Close() c, iterable, err := cr.ChunkOrIterable(chunks.Meta{ - Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, MergeOOO: true, + Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, }) require.Nil(t, iterable) require.Equal(t, err, fmt.Errorf("not found")) @@ -1030,94 +1029,6 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( } } -// TestSortByMinTimeAndMinRef tests that the sort function for chunk metas does sort -// by chunk meta MinTime and in case of same references by the lower reference. -func TestSortByMinTimeAndMinRef(t *testing.T) { - tests := []struct { - name string - input []chunkMetaAndChunkDiskMapperRef - exp []chunkMetaAndChunkDiskMapperRef - }{ - { - name: "chunks are ordered by min time", - input: []chunkMetaAndChunkDiskMapperRef{ - { - meta: chunks.Meta{ - Ref: 0, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(0), - }, - { - meta: chunks.Meta{ - Ref: 1, - MinTime: 1, - }, - ref: chunks.ChunkDiskMapperRef(1), - }, - }, - exp: []chunkMetaAndChunkDiskMapperRef{ - { - meta: chunks.Meta{ - Ref: 0, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(0), - }, - { - meta: chunks.Meta{ - Ref: 1, - MinTime: 1, - }, - ref: chunks.ChunkDiskMapperRef(1), - }, - }, - }, - { - name: "if same mintime, lower reference goes first", - input: []chunkMetaAndChunkDiskMapperRef{ - { - meta: chunks.Meta{ - Ref: 10, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(0), - }, - { - meta: chunks.Meta{ - Ref: 5, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(1), - }, - }, - exp: []chunkMetaAndChunkDiskMapperRef{ - { - meta: chunks.Meta{ - Ref: 5, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(1), - }, - { - meta: chunks.Meta{ - Ref: 10, - MinTime: 0, - }, - ref: chunks.ChunkDiskMapperRef(0), - }, - }, - }, - } - - for _, tc := range tests { - t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { - slices.SortFunc(tc.input, refLessByMinTimeAndMinRef) - require.Equal(t, tc.exp, tc.input) - }) - } -} - // TestSortMetaByMinTimeAndMinRef tests that the sort function for chunk metas does sort // by chunk meta MinTime and in case of same references by the lower reference. func TestSortMetaByMinTimeAndMinRef(t *testing.T) { From 1f38ae7bca8471e90a6fa227cd4cd567e25952c4 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 24 Aug 2024 16:50:15 +0100 Subject: [PATCH 50/83] [TESTS] TSDB: fix up OOO tests for new Series behaviour Signed-off-by: Bryan Boreham --- tsdb/ooo_head_read_test.go | 112 +++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 43 deletions(-) diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index 545bc7b6eb..40e37043b8 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -39,6 +39,11 @@ type chunkInterval struct { maxt int64 } +type expChunk struct { + c chunkInterval + m []chunkInterval +} + // permutateChunkIntervals returns all possible orders of the given chunkIntervals. func permutateChunkIntervals(in []chunkInterval, out [][]chunkInterval, left, right int) [][]chunkInterval { if left == right { @@ -65,7 +70,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { queryMinT int64 queryMaxT int64 inputChunkIntervals []chunkInterval - expChunks []chunkInterval + expChunks []expChunk }{ { name: "Empty result and no error when head is empty", @@ -107,8 +112,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // ts 0 100 150 200 250 300 350 400 450 500 550 600 650 700 // Query Interval [-----------------------------------------------------------] // Chunk 0: [---------------------------------------] - expChunks: []chunkInterval{ - {0, 150, 350}, + expChunks: []expChunk{ + {c: chunkInterval{0, 150, 350}}, }, }, { @@ -121,8 +126,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // ts 0 100 150 200 250 300 350 400 450 500 550 600 650 700 // Query Interval: [---------------------------------------] // Chunk 0: [-----------------------------------------------------------] - expChunks: []chunkInterval{ - {0, 100, 400}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 400}}, }, }, { @@ -142,9 +147,9 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 2: [-------------------] // Chunk 3: [-------------------] // Output Graphically [-----------------------------] [-----------------------------] - expChunks: []chunkInterval{ - {0, 100, 250}, - {1, 500, 650}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 250}, m: []chunkInterval{{0, 100, 200}, {2, 150, 250}}}, + {c: chunkInterval{1, 500, 650}, m: []chunkInterval{{1, 500, 600}, {3, 550, 650}}}, }, }, { @@ -164,8 +169,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 2: [-------------------] // Chunk 3: [------------------] // Output Graphically [------------------------------------------------------------------------------] - expChunks: []chunkInterval{ - {0, 100, 500}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 500}, m: []chunkInterval{{0, 100, 200}, {1, 200, 300}, {2, 300, 400}, {3, 400, 500}}}, }, }, { @@ -185,11 +190,11 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 2: [------------------] // Chunk 3: [------------------] // Output Graphically [------------------][------------------][------------------][------------------] - expChunks: []chunkInterval{ - {0, 100, 199}, - {1, 200, 299}, - {2, 300, 399}, - {3, 400, 499}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 199}}, + {c: chunkInterval{1, 200, 299}}, + {c: chunkInterval{2, 300, 399}}, + {c: chunkInterval{3, 400, 499}}, }, }, { @@ -209,8 +214,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 2: [------------------] // Chunk 3: [------------------] // Output Graphically [-----------------------------------------------] - expChunks: []chunkInterval{ - {0, 100, 350}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 350}, m: []chunkInterval{{0, 100, 200}, {1, 150, 300}, {2, 250, 350}}}, }, }, { @@ -228,8 +233,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 1: [-----------------------------] // Chunk 2: [------------------------------] // Output Graphically [-----------------------------------------------------------------------------------------] - expChunks: []chunkInterval{ - {1, 0, 500}, + expChunks: []expChunk{ + {c: chunkInterval{1, 0, 500}, m: []chunkInterval{{1, 0, 200}, {2, 150, 300}, {0, 250, 500}}}, }, }, { @@ -251,9 +256,9 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 3: [-------------------] // Chunk 4: [---------------------------------------] // Output Graphically [---------------------------------------] [------------------------------------------------] - expChunks: []chunkInterval{ - {0, 100, 300}, - {4, 600, 850}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 300}, m: []chunkInterval{{0, 100, 300}, {2, 150, 250}}}, + {c: chunkInterval{4, 600, 850}, m: []chunkInterval{{4, 600, 800}, {3, 650, 750}, {1, 770, 850}}}, }, }, { @@ -271,10 +276,10 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { // Chunk 1: [----------] // Chunk 2: [--------] // Output Graphically [-------] [--------] [----------] - expChunks: []chunkInterval{ - {0, 100, 150}, - {1, 300, 350}, - {2, 200, 250}, + expChunks: []expChunk{ + {c: chunkInterval{0, 100, 150}}, + {c: chunkInterval{2, 200, 250}}, + {c: chunkInterval{1, 300, 350}}, }, }, } @@ -305,24 +310,38 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { s1.ooo = &memSeriesOOOFields{} // define our expected chunks, by looking at the expected ChunkIntervals and setting... + // Ref to whatever Ref the chunk has, that we refer to by ID + findID := func(id int) chunks.ChunkRef { + for ref, c := range intervals { + if c.ID == id { + return chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), s1.oooHeadChunkID(ref))) + } + } + return 0 + } var expChunks []chunks.Meta for _, e := range tc.expChunks { - meta := chunks.Meta{ - Chunk: chunkenc.Chunk(nil), - MinTime: e.mint, - MaxTime: e.maxt, - } - - // Ref to whatever Ref the chunk has, that we refer to by ID - for ref, c := range intervals { - if c.ID == e.ID { - meta.Ref = chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), s1.oooHeadChunkID(ref))) - break + var chunk chunkenc.Chunk + if len(e.m) > 0 { + mm := &multiMeta{} + for _, x := range e.m { + meta := chunks.Meta{ + MinTime: x.mint, + MaxTime: x.maxt, + Ref: findID(x.ID), + } + mm.metas = append(mm.metas, meta) } + chunk = mm + } + meta := chunks.Meta{ + Chunk: chunk, + MinTime: e.c.mint, + MaxTime: e.c.maxt, + Ref: findID(e.c.ID), } expChunks = append(expChunks, meta) } - slices.SortFunc(expChunks, lessByMinTimeAndMinRef) // We always want the chunks to come back sorted by minTime asc. if headChunk && len(intervals) > 0 { // Put the last interval in the head chunk @@ -497,6 +516,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { queryMaxT int64 firstInOrderSampleAt int64 inputSamples []testValue + expSingleChunks bool expChunkError bool expChunksSamples []chunks.SampleSlice }{ @@ -509,7 +529,8 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { {Ts: minutes(30), V: 0}, {Ts: minutes(40), V: 0}, }, - expChunkError: false, + expChunkError: false, + expSingleChunks: true, // ts (in minutes) 0 10 20 30 40 50 60 70 80 90 100 // Query Interval [------------------------------------------------------------------------------------------] // Chunk 0: Current Head [--------] (With 2 samples) @@ -689,7 +710,8 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { {Ts: minutes(40), V: 3}, {Ts: minutes(42), V: 3}, }, - expChunkError: false, + expChunkError: false, + expSingleChunks: true, // ts (in minutes) 0 10 20 30 40 50 60 70 80 90 100 // Query Interval [------------------------------------------------------------------------------------------] // Chunk 0 [-------] @@ -844,9 +866,13 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { for i := 0; i < len(chks); i++ { c, iterable, err := cr.ChunkOrIterable(chks[i]) require.NoError(t, err) - require.Nil(t, c) - - it := iterable.Iterator(nil) + var it chunkenc.Iterator + if tc.expSingleChunks { + it = c.Iterator(nil) + } else { + require.Nil(t, c) + it = iterable.Iterator(nil) + } resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) From f90c7a11d16da9244d6acd971513f409211a5748 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Thu, 29 Aug 2024 11:57:31 +0100 Subject: [PATCH 51/83] [REFACTOR] OTLP translator: simplify time conversion We don't need multiple levels of abstraction to convert nanoseconds to milliseconds. We do benefit from tests, however. Signed-off-by: Bryan Boreham --- .../prometheusremotewrite/helper.go | 3 +-- .../prometheusremotewrite/helper_test.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index f2d7ecd4e3..67cf28119d 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -24,7 +24,6 @@ import ( "slices" "sort" "strconv" - "time" "unicode/utf8" "github.com/cespare/xxhash/v2" @@ -594,5 +593,5 @@ func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timesta // convertTimeStamp converts OTLP timestamp in ns to timestamp in ms func convertTimeStamp(timestamp pcommon.Timestamp) int64 { - return timestamp.AsTime().UnixNano() / (int64(time.Millisecond) / int64(time.Nanosecond)) + return int64(timestamp) / 1_000_000 } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index c4dd781ae6..729d99b8a8 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -14,6 +14,7 @@ package prometheusremotewrite import ( "testing" + "time" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" @@ -159,3 +160,21 @@ func TestCreateAttributes(t *testing.T) { }) } } + +func Test_convertTimeStamp(t *testing.T) { + tests := []struct { + name string + arg pcommon.Timestamp + want int64 + }{ + {"zero", 0, 0}, + {"1ms", 1_000_000, 1}, + {"1s", pcommon.Timestamp(time.Unix(1, 0).UnixNano()), 1000}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := convertTimeStamp(tt.arg) + assert.Equal(t, tt.want, got) + }) + } +} From e9e3d64b7c9df354d46c73da464b56a7c4cb7233 Mon Sep 17 00:00:00 2001 From: Jorge Creixell Date: Thu, 29 Aug 2024 15:50:39 +0200 Subject: [PATCH 52/83] PromQL engine: Delay deletion of __name__ label to the end of the query evaluation (#14477) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PromQL engine: Delay deletion of __name__ label to the end of the query evaluation - This change allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the dreaded "vector cannot contain metrics with the same labelset" error. - The implementation extends the `Series` and `Sample` structs with a boolean flag indicating whether the `__name__` label should be deleted at the end of the query evaluation. - The `label_replace` and `label_join` functions can still access the value of the `__name__` label, even if it has been previously marked for deletion. If `__name__` is used as target label, it won't be dropped at the end of the query evaluation. - Fixes https://github.com/prometheus/prometheus/issues/11397 - See https://github.com/jcreixell/prometheus/pull/2 for previous discussion, including the decision to create this PR and benchmark it before considering other alternatives (like refactoring `labels.Labels`). - See https://github.com/jcreixell/prometheus/pull/1 for an alternative implementation using a special label instead of boolean flags. - Note: a feature flag `promql-delayed-name-removal` has been added as it changes the behavior of some "weird" queries (see https://github.com/prometheus/prometheus/issues/11397#issuecomment-1451998792) Example (this always fails, as `__name__` is being dropped by `count_over_time`): ``` count_over_time({__name__!=""}[1m]) => Error executing query: vector cannot contain metrics with the same labelset ``` Before: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => Error executing query: vector cannot contain metrics with the same labelset ``` After: ``` label_replace(count_over_time({__name__!=""}[1m]), "__name__", "count_$1", "__name__", "(.+)") => count_go_gc_cycles_automatic_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 count_go_gc_cycles_forced_gc_cycles_total{instance="localhost:9090", job="prometheus"} 1 ... ``` Signed-off-by: Jorge Creixell --------- Signed-off-by: Jorge Creixell Signed-off-by: Björn Rabenstein --- cmd/prometheus/main.go | 14 +- docs/command-line/prometheus.md | 2 +- docs/feature_flags.md | 8 ++ promql/engine.go | 124 +++++++++++++----- promql/engine_test.go | 29 ++-- promql/functions.go | 120 +++++++++++++---- promql/promqltest/test.go | 2 + .../testdata/name_label_dropping.test | 84 ++++++++++++ promql/value.go | 6 + 9 files changed, 309 insertions(+), 80 deletions(-) create mode 100644 promql/promqltest/testdata/name_label_dropping.test diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index c402fbcb80..65ffd7de5a 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -169,6 +169,8 @@ type flagConfig struct { corsRegexString string promlogConfig promlog.Config + + promqlEnableDelayedNameRemoval bool } // setFeatureListOptions sets the corresponding options from the featureList. @@ -238,6 +240,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") + case "promql-delayed-name-removal": + c.promqlEnableDelayedNameRemoval = true + level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.") case "utf8-names": model.NameValidationScheme = model.UTF8Validation level.Info(logger).Log("msg", "Experimental UTF-8 support enabled") @@ -487,7 +492,7 @@ func main() { a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) promlogflag.AddFlags(a, &cfg.promlogConfig) @@ -799,9 +804,10 @@ func main() { NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get, // EnableAtModifier and EnableNegativeOffset have to be // always on for regular PromQL as of Prometheus v2.33. - EnableAtModifier: true, - EnableNegativeOffset: true, - EnablePerStepStats: cfg.enablePerStepStats, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: cfg.enablePerStepStats, + EnableDelayedNameRemoval: cfg.promqlEnableDelayedNameRemoval, } queryEngine = promql.NewEngine(opts) diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index f65c260c40..7d9e5a3c80 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -57,7 +57,7 @@ The Prometheus monitoring server | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | | --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 0a908bb91d..7b07a04d0e 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -250,6 +250,14 @@ Note that during this delay, the Head continues its usual operations, which incl Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place. +## Delay __name__ label removal for PromQL engine + +`--enable-feature=promql-delayed-name-removal` + +When enabled, Prometheus will change the way in which the `__name__` label is removed from PromQL query results (for functions and expressions for which this is necessary). Specifically, it will delay the removal to the last step of the query evaluation, instead of every time an expression or function creating derived metrics is evaluated. + +This allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the "vector cannot contain metrics with the same labelset" error, which can happen when applying a regex-matcher to the `__name__` label. + ## UTF-8 Name Support `--enable-feature=utf8-names` diff --git a/promql/engine.go b/promql/engine.go index 60c8e06394..b54ce2d6dc 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -313,6 +313,11 @@ type EngineOpts struct { // EnablePerStepStats if true allows for per-step stats to be computed on request. Disabled otherwise. EnablePerStepStats bool + + // EnableDelayedNameRemoval delays the removal of the __name__ label to the last step of the query evaluation. + // This is useful in certain scenarios where the __name__ label must be preserved or where applying a + // regex-matcher to the __name__ label may otherwise lead to duplicate labelset errors. + EnableDelayedNameRemoval bool } // Engine handles the lifetime of queries from beginning to end. @@ -330,6 +335,7 @@ type Engine struct { enableAtModifier bool enableNegativeOffset bool enablePerStepStats bool + enableDelayedNameRemoval bool } // NewEngine returns a new engine. @@ -420,6 +426,7 @@ func NewEngine(opts EngineOpts) *Engine { enableAtModifier: opts.EnableAtModifier, enableNegativeOffset: opts.EnableNegativeOffset, enablePerStepStats: opts.EnablePerStepStats, + enableDelayedNameRemoval: opts.EnableDelayedNameRemoval, } } @@ -712,6 +719,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval lookbackDelta: s.LookbackDelta, samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, + enableDelayedNameRemoval: ng.enableDelayedNameRemoval, } query.sampleStats.InitStepTracking(start, start, 1) @@ -743,9 +751,9 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval // Point might have a different timestamp, force it to the evaluation // timestamp as that is when we ran the evaluation. if len(s.Histograms) > 0 { - vector[i] = Sample{Metric: s.Metric, H: s.Histograms[0].H, T: start} + vector[i] = Sample{Metric: s.Metric, H: s.Histograms[0].H, T: start, DropName: s.DropName} } else { - vector[i] = Sample{Metric: s.Metric, F: s.Floats[0].F, T: start} + vector[i] = Sample{Metric: s.Metric, F: s.Floats[0].F, T: start, DropName: s.DropName} } } return vector, warnings, nil @@ -770,6 +778,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval lookbackDelta: s.LookbackDelta, samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, + enableDelayedNameRemoval: ng.enableDelayedNameRemoval, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) val, warnings, err := evaluator.Eval(s.Expr) @@ -1032,6 +1041,7 @@ type evaluator struct { lookbackDelta time.Duration samplesStats *stats.QuerySamples noStepSubqueryIntervalFn func(rangeMillis int64) int64 + enableDelayedNameRemoval bool } // errorf causes a panic with the input formatted into an error. @@ -1073,6 +1083,9 @@ func (ev *evaluator) Eval(expr parser.Expr) (v parser.Value, ws annotations.Anno defer ev.recover(expr, &ws, &err) v, ws = ev.eval(expr) + if ev.enableDelayedNameRemoval { + ev.cleanupMetricLabels(v) + } return v, ws, nil } @@ -1101,6 +1114,9 @@ type EvalNodeHelper struct { rightSigs map[string]Sample matchedSigs map[string]map[uint64]struct{} resultMetric map[string]labels.Labels + + // Additional options for the evaluation. + enableDelayedNameRemoval bool } func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) { @@ -1150,7 +1166,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) biggestLen = len(matrixes[i]) } } - enh := &EvalNodeHelper{Out: make(Vector, 0, biggestLen)} + enh := &EvalNodeHelper{Out: make(Vector, 0, biggestLen), enableDelayedNameRemoval: ev.enableDelayedNameRemoval} type seriesAndTimestamp struct { Series ts int64 @@ -1196,12 +1212,12 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) for si, series := range matrixes[i] { switch { case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts}) + vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) // Move input vectors forward so we don't have to re-scan the same // past points at the next step. matrixes[i][si].Floats = series.Floats[1:] case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts}) + vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) matrixes[i][si].Histograms = series.Histograms[1:] default: continue @@ -1240,15 +1256,15 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) // If this could be an instant query, shortcut so as not to change sort order. if ev.endTimestamp == ev.startTimestamp { - if result.ContainsSameLabelset() { + if !ev.enableDelayedNameRemoval && result.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") } mat := make(Matrix, len(result)) for i, s := range result { if s.H == nil { - mat[i] = Series{Metric: s.Metric, Floats: []FPoint{{T: ts, F: s.F}}} + mat[i] = Series{Metric: s.Metric, Floats: []FPoint{{T: ts, F: s.F}}, DropName: s.DropName} } else { - mat[i] = Series{Metric: s.Metric, Histograms: []HPoint{{T: ts, H: s.H}}} + mat[i] = Series{Metric: s.Metric, Histograms: []HPoint{{T: ts, H: s.H}}, DropName: s.DropName} } } ev.currentSamples = originalNumSamples + mat.TotalSamples() @@ -1266,7 +1282,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) } ss.ts = ts } else { - ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts} + ss = seriesAndTimestamp{Series{Metric: sample.Metric, DropName: sample.DropName}, ts} } addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps) seriess[h] = ss @@ -1302,7 +1318,7 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping var warnings annotations.Annotations - enh := &EvalNodeHelper{} + enh := &EvalNodeHelper{enableDelayedNameRemoval: ev.enableDelayedNameRemoval} tempNumSamples := ev.currentSamples // Create a mapping from input series to output groups. @@ -1611,10 +1627,17 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio var prevSS *Series inMatrix := make(Matrix, 1) inArgs[matrixArgIndex] = inMatrix - enh := &EvalNodeHelper{Out: make(Vector, 0, 1)} + enh := &EvalNodeHelper{Out: make(Vector, 0, 1), enableDelayedNameRemoval: ev.enableDelayedNameRemoval} // Process all the calls for one time series at a time. it := storage.NewBuffer(selRange) var chkIter chunkenc.Iterator + + // The last_over_time function acts like offset; thus, it + // should keep the metric name. For all the other range + // vector functions, the only change needed is to drop the + // metric name in the output. + dropName := e.Func.Name != "last_over_time" + for i, s := range selVS.Series { if err := contextDone(ev.ctx, "expression evaluation"); err != nil { ev.error(err) @@ -1629,15 +1652,12 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio chkIter = s.Iterator(chkIter) it.Reset(chkIter) metric := selVS.Series[i].Labels() - // The last_over_time function acts like offset; thus, it - // should keep the metric name. For all the other range - // vector functions, the only change needed is to drop the - // metric name in the output. - if e.Func.Name != "last_over_time" { + if !ev.enableDelayedNameRemoval && dropName { metric = metric.DropMetricName() } ss := Series{ - Metric: metric, + Metric: metric, + DropName: dropName, } inMatrix[0].Metric = selVS.Series[i].Labels() for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { @@ -1752,16 +1772,16 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio return Matrix{ Series{ - Metric: createLabelsForAbsentFunction(e.Args[0]), - Floats: newp, + Metric: createLabelsForAbsentFunction(e.Args[0]), + Floats: newp, + DropName: dropName, }, }, warnings } - if mat.ContainsSameLabelset() { + if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") } - return mat, warnings case *parser.ParenExpr: @@ -1772,12 +1792,15 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio mat := val.(Matrix) if e.Op == parser.SUB { for i := range mat { - mat[i].Metric = mat[i].Metric.DropMetricName() + if !ev.enableDelayedNameRemoval { + mat[i].Metric = mat[i].Metric.DropMetricName() + } + mat[i].DropName = true for j := range mat[i].Floats { mat[i].Floats[j].F = -mat[i].Floats[j].F } } - if mat.ContainsSameLabelset() { + if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") } } @@ -1913,6 +1936,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio lookbackDelta: ev.lookbackDelta, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, + enableDelayedNameRemoval: ev.enableDelayedNameRemoval, } if e.Step != 0 { @@ -1957,6 +1981,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio lookbackDelta: ev.lookbackDelta, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, + enableDelayedNameRemoval: ev.enableDelayedNameRemoval, } res, ws := newEv.eval(e.Expr) ev.currentSamples = newEv.currentSamples @@ -2553,7 +2578,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * continue } metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh) - if returnBool { + if !ev.enableDelayedNameRemoval && returnBool { metric = metric.DropMetricName() } insertedSigs, exists := matchedSigs[sig] @@ -2578,9 +2603,10 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * } enh.Out = append(enh.Out, Sample{ - Metric: metric, - F: floatValue, - H: histogramValue, + Metric: metric, + F: floatValue, + H: histogramValue, + DropName: returnBool, }) } return enh.Out, lastErr @@ -2680,7 +2706,10 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala lhsSample.F = float lhsSample.H = histogram if shouldDropMetricName(op) || returnBool { - lhsSample.Metric = lhsSample.Metric.DropMetricName() + if !ev.enableDelayedNameRemoval { + lhsSample.Metric = lhsSample.Metric.DropMetricName() + } + lhsSample.DropName = true } enh.Out = append(enh.Out, lhsSample) } @@ -3019,6 +3048,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix ss := &outputMatrix[ri] addToSeries(ss, enh.Ts, aggr.floatValue, aggr.histogramValue, numSteps) + ss.DropName = inputMatrix[ri].DropName } return annos @@ -3045,7 +3075,7 @@ seriesLoop: if !ok { continue } - s = Sample{Metric: inputMatrix[si].Metric, F: f} + s = Sample{Metric: inputMatrix[si].Metric, F: f, DropName: inputMatrix[si].DropName} group := &groups[seriesToResult[si]] // Initialize this group if it's the first time we've seen it. @@ -3129,16 +3159,16 @@ seriesLoop: mat = make(Matrix, 0, len(groups)) } - add := func(lbls labels.Labels, f float64) { + add := func(lbls labels.Labels, f float64, dropName bool) { // If this could be an instant query, add directly to the matrix so the result is in consistent order. if ev.endTimestamp == ev.startTimestamp { - mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}}) + mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName}) } else { // Otherwise the results are added into seriess elements. hash := lbls.Hash() ss, ok := seriess[hash] if !ok { - ss = Series{Metric: lbls} + ss = Series{Metric: lbls, DropName: dropName} } addToSeries(&ss, enh.Ts, f, nil, numSteps) seriess[hash] = ss @@ -3155,7 +3185,7 @@ seriesLoop: sort.Sort(sort.Reverse(aggr.heap)) } for _, v := range aggr.heap { - add(v.Metric, v.F) + add(v.Metric, v.F, v.DropName) } case parser.BOTTOMK: @@ -3164,12 +3194,12 @@ seriesLoop: sort.Sort(sort.Reverse((*vectorByReverseValueHeap)(&aggr.heap))) } for _, v := range aggr.heap { - add(v.Metric, v.F) + add(v.Metric, v.F, v.DropName) } case parser.LIMITK, parser.LIMIT_RATIO: for _, v := range aggr.heap { - add(v.Metric, v.F) + add(v.Metric, v.F, v.DropName) } } } @@ -3221,6 +3251,30 @@ func (ev *evaluator) aggregationCountValues(e *parser.AggregateExpr, grouping [] return enh.Out, nil } +func (ev *evaluator) cleanupMetricLabels(v parser.Value) { + if v.Type() == parser.ValueTypeMatrix { + mat := v.(Matrix) + for i := range mat { + if mat[i].DropName { + mat[i].Metric = mat[i].Metric.DropMetricName() + } + } + if mat.ContainsSameLabelset() { + ev.errorf("vector cannot contain metrics with the same labelset") + } + } else if v.Type() == parser.ValueTypeVector { + vec := v.(Vector) + for i := range vec { + if vec[i].DropName { + vec[i].Metric = vec[i].Metric.DropMetricName() + } + } + if vec.ContainsSameLabelset() { + ev.errorf("vector cannot contain metrics with the same labelset") + } + } +} + func addToSeries(ss *Series, ts int64, f float64, h *histogram.FloatHistogram, numSteps int) { if h == nil { if ss.Floats == nil { diff --git a/promql/engine_test.go b/promql/engine_test.go index 37cba975b3..3e159782a3 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -1714,7 +1714,8 @@ load 1ms {F: 3600, T: 6 * 60 * 1000}, {F: 3600, T: 7 * 60 * 1000}, }, - Metric: labels.EmptyLabels(), + Metric: labels.EmptyLabels(), + DropName: true, }, }, }, @@ -1930,20 +1931,24 @@ func TestSubquerySelector(t *testing.T) { nil, promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, - Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"), + Floats: []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"), + DropName: true, }, promql.Series{ - Floats: []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, - Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"), + Floats: []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"), + DropName: true, }, promql.Series{ - Floats: []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, - Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"), + Floats: []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"), + DropName: true, }, promql.Series{ - Floats: []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, - Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"), + Floats: []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"), + DropName: true, }, }, nil, @@ -3470,11 +3475,11 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { // histogram * scalar. queryString := fmt.Sprintf(`%s * %f`, seriesName, c.scalar) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels(), DropName: true}}) // scalar * histogram. queryString = fmt.Sprintf(`%f * %s`, c.scalar, seriesName) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels(), DropName: true}}) // histogram * float. queryString = fmt.Sprintf(`%s * %s`, seriesName, floatSeriesName) @@ -3486,7 +3491,7 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { // histogram / scalar. queryString = fmt.Sprintf(`%s / %f`, seriesName, c.scalar) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels(), DropName: true}}) // histogram / float. queryString = fmt.Sprintf(`%s / %s`, seriesName, floatSeriesName) diff --git a/promql/functions.go b/promql/functions.go index 61a2dd0765..9fa7fbe190 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -483,9 +483,13 @@ func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper return enh.Out, nil } for _, el := range vec { + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: math.Max(minVal, math.Min(maxVal, el.F)), + Metric: el.Metric, + F: math.Max(minVal, math.Min(maxVal, el.F)), + DropName: true, }) } return enh.Out, nil @@ -496,9 +500,13 @@ func funcClampMax(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel vec := vals[0].(Vector) maxVal := vals[1].(Vector)[0].F for _, el := range vec { + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: math.Min(maxVal, el.F), + Metric: el.Metric, + F: math.Min(maxVal, el.F), + DropName: true, }) } return enh.Out, nil @@ -509,9 +517,13 @@ func funcClampMin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel vec := vals[0].(Vector) minVal := vals[1].(Vector)[0].F for _, el := range vec { + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: math.Max(minVal, el.F), + Metric: el.Metric, + F: math.Max(minVal, el.F), + DropName: true, }) } return enh.Out, nil @@ -532,8 +544,9 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper for _, el := range vec { f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: f, + Metric: el.Metric, + F: f, + DropName: true, }) } return enh.Out, nil @@ -882,9 +895,13 @@ func funcPresentOverTime(vals []parser.Value, args parser.Expressions, enh *Eval func simpleFunc(vals []parser.Value, enh *EvalNodeHelper, f func(float64) float64) Vector { for _, el := range vals[0].(Vector) { if el.H == nil { // Process only float samples. + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: f(el.F), + Metric: el.Metric, + F: f(el.F), + DropName: true, }) } } @@ -1028,9 +1045,13 @@ func funcSgn(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec := vals[0].(Vector) for _, el := range vec { + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: float64(el.T) / 1000, + Metric: el.Metric, + F: float64(el.T) / 1000, + DropName: true, }) } return enh.Out, nil @@ -1137,9 +1158,13 @@ func funcHistogramCount(vals []parser.Value, args parser.Expressions, enh *EvalN if sample.H == nil { continue } + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: sample.H.Count, + Metric: sample.Metric, + F: sample.H.Count, + DropName: true, }) } return enh.Out, nil @@ -1154,9 +1179,13 @@ func funcHistogramSum(vals []parser.Value, args parser.Expressions, enh *EvalNod if sample.H == nil { continue } + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: sample.H.Sum, + Metric: sample.Metric, + F: sample.H.Sum, + DropName: true, }) } return enh.Out, nil @@ -1171,9 +1200,13 @@ func funcHistogramAvg(vals []parser.Value, args parser.Expressions, enh *EvalNod if sample.H == nil { continue } + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: sample.H.Sum / sample.H.Count, + Metric: sample.Metric, + F: sample.H.Sum / sample.H.Count, + DropName: true, }) } return enh.Out, nil @@ -1210,9 +1243,13 @@ func funcHistogramStdDev(vals []parser.Value, args parser.Expressions, enh *Eval } variance += cVariance variance /= sample.H.Count + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: math.Sqrt(variance), + Metric: sample.Metric, + F: math.Sqrt(variance), + DropName: true, }) } return enh.Out, nil @@ -1249,9 +1286,13 @@ func funcHistogramStdVar(vals []parser.Value, args parser.Expressions, enh *Eval } variance += cVariance variance /= sample.H.Count + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: variance, + Metric: sample.Metric, + F: variance, + DropName: true, }) } return enh.Out, nil @@ -1268,9 +1309,13 @@ func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *Ev if sample.H == nil { continue } + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: histogramFraction(lower, upper, sample.H), + Metric: sample.Metric, + F: histogramFraction(lower, upper, sample.H), + DropName: true, }) } return enh.Out, nil @@ -1338,9 +1383,13 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev continue } + if !enh.enableDelayedNameRemoval { + sample.Metric = sample.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: sample.Metric.DropMetricName(), - F: histogramQuantile(q, sample.H), + Metric: sample.Metric, + F: histogramQuantile(q, sample.H), + DropName: true, }) } @@ -1442,6 +1491,11 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an lb.Reset(el.Metric) lb.Set(dst, string(res)) matrix[i].Metric = lb.Labels() + if dst == model.MetricNameLabel { + matrix[i].DropName = false + } else { + matrix[i].DropName = el.DropName + } } } if matrix.ContainsSameLabelset() { @@ -1496,6 +1550,12 @@ func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annot lb.Reset(el.Metric) lb.Set(dst, strval) matrix[i].Metric = lb.Labels() + + if dst == model.MetricNameLabel { + matrix[i].DropName = false + } else { + matrix[i].DropName = el.DropName + } } return matrix, ws @@ -1518,9 +1578,13 @@ func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) flo for _, el := range vals[0].(Vector) { t := time.Unix(int64(el.F), 0).UTC() + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ - Metric: el.Metric.DropMetricName(), - F: f(t), + Metric: el.Metric, + F: f(t), + DropName: true, }) } return enh.Out diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 4d12dead9b..f43d7a5741 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -90,6 +90,7 @@ func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamp EnableNegativeOffset: true, EnablePerStepStats: enablePerStepStats, LookbackDelta: lookbackDelta, + EnableDelayedNameRemoval: true, }) } @@ -1362,6 +1363,7 @@ func (ll *LazyLoader) clear() error { NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(ll.SubqueryInterval) }, EnableAtModifier: ll.opts.EnableAtModifier, EnableNegativeOffset: ll.opts.EnableNegativeOffset, + EnableDelayedNameRemoval: true, } ll.queryEngine = promql.NewEngine(opts) diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test new file mode 100644 index 0000000000..1f1dac3602 --- /dev/null +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -0,0 +1,84 @@ +# Test for __name__ label drop. +load 5m + metric{env="1"} 0 60 120 + another_metric{env="1"} 60 120 180 + +# Does not drop __name__ for vector selector +eval instant at 15m metric{env="1"} + metric{env="1"} 120 + +# Drops __name__ for unary operators +eval instant at 15m -metric + {env="1"} -120 + +# Drops __name__ for binary operators +eval instant at 15m metric + another_metric + {env="1"} 300 + +# Does not drop __name__ for binary comparison operators +eval instant at 15m metric <= another_metric + metric{env="1"} 120 + +# Drops __name__ for binary comparison operators with "bool" modifier +eval instant at 15m metric <= bool another_metric + {env="1"} 1 + +# Drops __name__ for vector-scalar operations +eval instant at 15m metric * 2 + {env="1"} 240 + +# Drops __name__ for instant-vector functions +eval instant at 15m clamp(metric, 0, 100) + {env="1"} 100 + +# Drops __name__ for range-vector functions +eval instant at 15m rate(metric{env="1"}[10m]) + {env="1"} 0.2 + +# Does not drop __name__ for last_over_time function +eval instant at 15m last_over_time(metric{env="1"}[10m]) + metric{env="1"} 120 + +# Drops name for other _over_time functions +eval instant at 15m max_over_time(metric{env="1"}[10m]) + {env="1"} 120 + +# Allows relabeling (to-be-dropped) __name__ via label_replace +eval instant at 15m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") + {my_name="rate_metric", env="1"} 0.2 + {my_name="rate_another_metric", env="1"} 0.2 + +# Allows preserving __name__ via label_replace +eval instant at 15m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") + rate_metric{env="1"} 0.2 + rate_another_metric{env="1"} 0.2 + +# Allows relabeling (to-be-dropped) __name__ via label_join +eval instant at 15m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") + {my_name="metric", env="1"} 0.2 + {my_name="another_metric", env="1"} 0.2 + +# Allows preserving __name__ via label_join +eval instant at 15m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") + metric_1{env="1"} 0.2 + another_metric_1{env="1"} 0.2 + +# Does not drop metric names fro aggregation operators +eval instant at 15m sum by (__name__, env) (metric{env="1"}) + metric{env="1"} 120 + +# Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label) +# This is an accidental side effect of delayed __name__ label dropping +eval_fail instant at 15m sum by (__name__) (rate({env="1"}[10m])) + +# Aggregation operators aggregate metrics with same labelset and to-be-dropped names +# This is an accidental side effect of delayed __name__ label dropping +eval instant at 15m sum(rate({env="1"}[10m])) by (env) + {env="1"} 0.4 + +# Aggregationk operators propagate __name__ label dropping information +eval instant at 15m topk(10, sum by (__name__, env) (metric{env="1"})) + metric{env="1"} 120 + +eval instant at 15m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) + {env="1"} 0.2 diff --git a/promql/value.go b/promql/value.go index f129137d80..f25dbcd780 100644 --- a/promql/value.go +++ b/promql/value.go @@ -68,6 +68,9 @@ type Series struct { Metric labels.Labels `json:"metric"` Floats []FPoint `json:"values,omitempty"` Histograms []HPoint `json:"histograms,omitempty"` + // DropName is used to indicate whether the __name__ label should be dropped + // as part of the query evaluation. + DropName bool `json:"-"` } func (s Series) String() string { @@ -194,6 +197,9 @@ type Sample struct { H *histogram.FloatHistogram Metric labels.Labels + // DropName is used to indicate whether the __name__ label should be dropped + // as part of the query evaluation. + DropName bool } func (s Sample) String() string { From 8c7bf39d9671f717df8286d94869485a562f1ac9 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia <80708727+NeerajGartia21@users.noreply.github.com> Date: Thu, 29 Aug 2024 20:12:35 +0530 Subject: [PATCH 53/83] Moves TestNativeHistogram_MulDivOperator to promql testing framework (#14688) PromQL: add test for mul and div operator Also, remove the converted test from the engine_test.go file. This also includes an extension of the test framework to allow NaN/Inf in histogram buckets. --------- Signed-off-by: Neeraj Gartia --- promql/engine_test.go | 166 ------------------ promql/parser/lex.go | 10 ++ promql/parser/lex_test.go | 23 +++ .../testdata/native_histograms.test | 46 +++++ 4 files changed, 79 insertions(+), 166 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index 3e159782a3..923d1264d6 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -17,7 +17,6 @@ import ( "context" "errors" "fmt" - "math" "os" "sort" "strconv" @@ -3337,171 +3336,6 @@ func TestNativeHistogram_SubOperator(t *testing.T) { } } -func TestNativeHistogram_MulDivOperator(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - originalHistogram := histogram.Histogram{ - Schema: 0, - Count: 21, - Sum: 33, - ZeroThreshold: 0.001, - ZeroCount: 3, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{3, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - NegativeBuckets: []int64{3, 0, 0}, - } - - cases := []struct { - scalar float64 - histogram histogram.Histogram - expectedMul histogram.FloatHistogram - expectedDiv histogram.FloatHistogram - }{ - { - scalar: 3, - histogram: originalHistogram, - expectedMul: histogram.FloatHistogram{ - Schema: 0, - Count: 63, - Sum: 99, - ZeroThreshold: 0.001, - ZeroCount: 9, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []float64{9, 9, 9}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - NegativeBuckets: []float64{9, 9, 9}, - }, - expectedDiv: histogram.FloatHistogram{ - Schema: 0, - Count: 7, - Sum: 11, - ZeroThreshold: 0.001, - ZeroCount: 1, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []float64{1, 1, 1}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - NegativeBuckets: []float64{1, 1, 1}, - }, - }, - { - scalar: 0, - histogram: originalHistogram, - expectedMul: histogram.FloatHistogram{ - Schema: 0, - Count: 0, - Sum: 0, - ZeroThreshold: 0.001, - ZeroCount: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []float64{0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - NegativeBuckets: []float64{0, 0, 0}, - }, - expectedDiv: histogram.FloatHistogram{ - Schema: 0, - Count: math.Inf(1), - Sum: math.Inf(1), - ZeroThreshold: 0.001, - ZeroCount: math.Inf(1), - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1)}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 3}, - }, - NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1)}, - }, - }, - } - - idx0 := int64(0) - for _, c := range cases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) { - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - floatSeriesName := "float_series" - - engine := newTestEngine() - - ts := idx0 * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - h := c.histogram - lbls := labels.FromStrings("__name__", seriesName) - // Since we mutate h later, we need to create a copy here. - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) - } - require.NoError(t, err) - _, err = app.Append(0, labels.FromStrings("__name__", floatSeriesName), ts, c.scalar) - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryAndCheck := func(queryString string, exp promql.Vector) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - testutil.RequireEqual(t, exp, vector) - } - - // histogram * scalar. - queryString := fmt.Sprintf(`%s * %f`, seriesName, c.scalar) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels(), DropName: true}}) - - // scalar * histogram. - queryString = fmt.Sprintf(`%f * %s`, c.scalar, seriesName) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels(), DropName: true}}) - - // histogram * float. - queryString = fmt.Sprintf(`%s * %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) - - // float * histogram. - queryString = fmt.Sprintf(`%s * %s`, floatSeriesName, seriesName) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) - - // histogram / scalar. - queryString = fmt.Sprintf(`%s / %f`, seriesName, c.scalar) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels(), DropName: true}}) - - // histogram / float. - queryString = fmt.Sprintf(`%s / %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) - }) - idx0++ - } - } -} - func TestQueryLookbackDelta(t *testing.T) { var ( load = `load 5m diff --git a/promql/parser/lex.go b/promql/parser/lex.go index 0cefa30c8f..d031e83307 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -617,6 +617,16 @@ func lexBuckets(l *Lexer) stateFn { l.bracketOpen = false l.emit(RIGHT_BRACKET) return lexHistogram + case isAlpha(r): + // Current word is Inf or NaN. + word := l.input[l.start:l.pos] + if desc, ok := key[strings.ToLower(word)]; ok { + if desc == NUMBER { + l.emit(desc) + return lexStatements + } + } + return lexBuckets default: return l.errorf("invalid character in buckets description: %q", r) } diff --git a/promql/parser/lex_test.go b/promql/parser/lex_test.go index ac9aa27625..c5475a8b94 100644 --- a/promql/parser/lex_test.go +++ b/promql/parser/lex_test.go @@ -639,6 +639,29 @@ var tests = []struct { }, seriesDesc: true, }, + { + input: `{} {{buckets: [Inf NaN] schema:1}}`, + expected: []Item{ + {LEFT_BRACE, 0, `{`}, + {RIGHT_BRACE, 1, `}`}, + {SPACE, 2, ` `}, + {OPEN_HIST, 3, `{{`}, + {BUCKETS_DESC, 5, `buckets`}, + {COLON, 12, `:`}, + {SPACE, 13, ` `}, + {LEFT_BRACKET, 14, `[`}, + {NUMBER, 15, `Inf`}, + {SPACE, 18, ` `}, + {NUMBER, 19, `NaN`}, + {RIGHT_BRACKET, 22, `]`}, + {SPACE, 23, ` `}, + {SCHEMA_DESC, 24, `schema`}, + {COLON, 30, `:`}, + {NUMBER, 31, `1`}, + {CLOSE_HIST, 32, `}}`}, + }, + seriesDesc: true, + }, { // Series with sum as -Inf and count as NaN. input: `{} {{buckets: [5 10 7] sum:Inf count:NaN}}`, expected: []Item{ diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 110fd69e7e..71e102dcee 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -718,6 +718,52 @@ eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) eval instant at 10m histogram_sum(scalar(histogram_fraction(-Inf, +Inf, sum(histogram_fraction_4))) * histogram_fraction_4) {} 100 +# Apply multiplication and division operator to histogram. +load 10m + histogram_mul_div {{schema:0 count:21 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[3 3 3]}}x1 + float_series_3 3+0x1 + float_series_0 0+0x1 + +eval instant at 10m histogram_mul_div*3 + {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + +eval instant at 10m 3*histogram_mul_div + {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + +eval instant at 10m histogram_mul_div*float_series_3 + {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + +eval instant at 10m float_series_3*histogram_mul_div + {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + +eval instant at 10m histogram_mul_div/3 + {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + +eval instant at 10m histogram_mul_div/float_series_3 + {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + +eval instant at 10m histogram_mul_div*0 + {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} + +eval instant at 10m 0*histogram_mul_div + {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} + +eval instant at 10m histogram_mul_div*float_series_0 + {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} + +eval instant at 10m float_series_0*histogram_mul_div + {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} + +# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934 +eval instant at 10m histogram_mul_div/0 + {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + +eval instant at 10m histogram_mul_div/float_series_0 + {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + +eval instant at 10m histogram_mul_div*0/0 + {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}} + clear # Counter reset only noticeable in a single bucket. From b0677263c3d40efcb86ff21afe9b29d860660a45 Mon Sep 17 00:00:00 2001 From: Kevin Rawal <84058124+kevinrawal@users.noreply.github.com> Date: Thu, 29 Aug 2024 23:06:31 +0530 Subject: [PATCH 54/83] Update basics.md fix broken URL for feature flags native histograms in querying prometheus documentation Signed-off-by: Kevin Rawal <84058124+kevinrawal@users.noreply.github.com> --- docs/querying/basics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/querying/basics.md b/docs/querying/basics.md index 304c9f07d4..81ffb4e0f3 100644 --- a/docs/querying/basics.md +++ b/docs/querying/basics.md @@ -41,7 +41,7 @@ vector is the only type which can be graphed. _Notes about the experimental native histograms:_ * Ingesting native histograms has to be enabled via a [feature - flag](../../feature_flags.md#native-histograms). + flag](../feature_flags.md#native-histograms). * Once native histograms have been ingested into the TSDB (and even after disabling the feature flag again), both instant vectors and range vectors may now contain samples that aren't simple floating point numbers (float samples) From a77f5007f9debcc1fc5b7133ac422e2b164a8c99 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Fri, 30 Aug 2024 00:14:20 -0700 Subject: [PATCH 55/83] fix bug with metadata for rw2 (#14766) Signed-off-by: Callum Styan --- tsdb/wlog/watcher.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index 86e6f9c81d..ac5041e87b 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -603,7 +603,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { } case record.Metadata: - if !w.sendMetadata || !tail { + if !w.sendMetadata { break } meta, err := dec.Metadata(rec, metadata[:0]) From d550c4a0b97ff1ad7cd427f562d7eadde894d417 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 30 Aug 2024 10:13:00 +0100 Subject: [PATCH 56/83] [COMMENT] Improve comment on almost.Equal; add tests At first it wasn't obvious to me why the first check would give a different result to the second. Signed-off-by: Bryan Boreham --- util/almost/almost.go | 6 ++--- util/almost/almost_test.go | 50 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 util/almost/almost_test.go diff --git a/util/almost/almost.go b/util/almost/almost.go index a404626586..5f866b89b3 100644 --- a/util/almost/almost.go +++ b/util/almost/almost.go @@ -22,10 +22,10 @@ import ( var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. // Equal returns true if a and b differ by less than their sum -// multiplied by epsilon. +// multiplied by epsilon, or if both are StaleNaN, or if both are any other NaN. func Equal(a, b, epsilon float64) bool { - // StaleNaN is a special value that is used as staleness maker, so - // the two values are equal when both are exactly equals to stale NaN. + // StaleNaN is a special value that is used as staleness maker, and + // we don't want it to compare equal to any other NaN. if value.IsStaleNaN(a) || value.IsStaleNaN(b) { return value.IsStaleNaN(a) && value.IsStaleNaN(b) } diff --git a/util/almost/almost_test.go b/util/almost/almost_test.go new file mode 100644 index 0000000000..fba37f13f6 --- /dev/null +++ b/util/almost/almost_test.go @@ -0,0 +1,50 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package almost + +import ( + "fmt" + "math" + "testing" + + "github.com/prometheus/prometheus/model/value" +) + +func TestEqual(t *testing.T) { + staleNaN := math.Float64frombits(value.StaleNaN) + tests := []struct { + a float64 + b float64 + epsilon float64 + want bool + }{ + {0.0, 0.0, 0.0, true}, + {0.0, 0.1, 0.0, false}, + {1.0, 1.1, 0.1, true}, + {-1.0, -1.1, 0.1, true}, + {math.MaxFloat64, math.MaxFloat64 / 10, 0.1, false}, + {1.0, math.NaN(), 0.1, false}, + {math.NaN(), math.NaN(), 0.1, true}, + {math.NaN(), staleNaN, 0.1, false}, + {staleNaN, math.NaN(), 0.1, false}, + {staleNaN, staleNaN, 0.1, true}, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("%v,%v,%v", tt.a, tt.b, tt.epsilon), func(t *testing.T) { + if got := Equal(tt.a, tt.b, tt.epsilon); got != tt.want { + t.Errorf("Equal(%v,%v,%v) = %v, want %v", tt.a, tt.b, tt.epsilon, got, tt.want) + } + }) + } +} From bc6c2c5d3502926852f33df0b0a8da1cb3c7bd00 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Fri, 30 Aug 2024 11:30:57 +0200 Subject: [PATCH 57/83] OTLP Receiver: Add tests (#14764) Signed-off-by: Arve Knudsen --- .../prometheusremotewrite/helper_test.go | 225 +++++ .../prometheusremotewrite/histograms_test.go | 771 ++++++++++++++++++ .../number_data_points_test.go | 258 ++++++ .../prometheusremotewrite/testutil_test.go | 55 ++ 4 files changed, 1309 insertions(+) create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index 729d99b8a8..e02ebbf5de 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -10,6 +10,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/debbf30360b8d3a0ded8db09c4419d2a9c99b94a/pkg/translator/prometheusremotewrite/helper_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + package prometheusremotewrite import ( @@ -18,6 +22,9 @@ import ( "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/prompb" ) @@ -178,3 +185,221 @@ func Test_convertTimeStamp(t *testing.T) { }) } } + +func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "summary with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_summary") + metric.SetEmptySummary() + + dp := metric.Summary().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + dp.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_summary" + countStr}, + } + createdLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_summary" + createdSuffix}, + } + sumLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_summary" + sumStr}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(sumLabels): { + Labels: sumLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "summary without start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_summary") + metric.SetEmptySummary() + + dp := metric.Summary().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_summary" + countStr}, + } + sumLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_summary" + sumStr}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(sumLabels): { + Labels: sumLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + converter.addSummaryDataPoints( + metric.Summary().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} + +func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "histogram with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.Histogram().DataPoints().AppendEmpty() + pt.SetTimestamp(ts) + pt.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist" + countStr}, + } + createdLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist" + createdSuffix}, + } + infLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist_bucket"}, + {Name: model.BucketLabel, Value: "+Inf"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(infLabels): { + Labels: infLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "histogram without start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.Histogram().DataPoints().AppendEmpty() + pt.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist" + countStr}, + } + infLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist_bucket"}, + {Name: model.BucketLabel, Value: "+Inf"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(infLabels): { + Labels: infLabels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + converter.addHistogramDataPoints( + metric.Histogram().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go new file mode 100644 index 0000000000..cd1c858ac1 --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -0,0 +1,771 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/histograms_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "fmt" + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/prompb" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +) + +type expectedBucketLayout struct { + wantSpans []prompb.BucketSpan + wantDeltas []int64 +} + +func TestConvertBucketsLayout(t *testing.T) { + tests := []struct { + name string + buckets func() pmetric.ExponentialHistogramDataPointBuckets + wantLayout map[int32]expectedBucketLayout + }{ + { + name: "zero offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(0) + b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 4, + }, + }, + wantDeltas: []int64{4, -1, -1, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 2, + }, + }, + // 4+3, 2+1 = 7, 3 =delta= 7, -4 + wantDeltas: []int64{7, -4}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 1, + }, + }, + // 4+3+2+1 = 10 =delta= 10 + wantDeltas: []int64{10}, + }, + }, + }, + { + name: "offset 1", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(1) + b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 4, + }, + }, + wantDeltas: []int64{4, -1, -1, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 3, + }, + }, + wantDeltas: []int64{4, 1, -4}, // 0+4, 3+2, 1+0 = 4, 5, 1 + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 1, + Length: 2, + }, + }, + wantDeltas: []int64{9, -8}, // 0+4+3+2, 1+0+0+0 = 9, 1 + }, + }, + }, + { + name: "positive offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(4) + b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 5, + Length: 4, + }, + { + Offset: 12, + Length: 1, + }, + }, + wantDeltas: []int64{4, -2, -2, 2, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 3, + Length: 2, + }, + { + Offset: 6, + Length: 1, + }, + }, + // Downscale: + // 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 0, 0, 1 + wantDeltas: []int64{6, -4, -1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 1, + }, + { + Offset: 3, + Length: 1, + }, + }, + // Downscale: + // 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1 + // Check from sclaing from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 + wantDeltas: []int64{8, -7}, + }, + }, + }, + { + name: "scaledown merges spans", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(4) + b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 5, + Length: 4, + }, + { + Offset: 8, + Length: 1, + }, + }, + wantDeltas: []int64{4, -2, -2, 2, -1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 3, + Length: 2, + }, + { + Offset: 4, + Length: 1, + }, + }, + // Downscale: + // 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 1 + wantDeltas: []int64{6, -4, -1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 2, + Length: 4, + }, + }, + // Downscale: + // 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1 + // Check from sclaing from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 + wantDeltas: []int64{8, -8, 0, 1}, + }, + }, + }, + { + name: "negative offset", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 1, 0, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 2, + }, + { + Offset: 3, + Length: 1, + }, + }, + wantDeltas: []int64{3, -2, 0}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 3+1, 0+0, 0+1 = 4, 0, 1 + wantDeltas: []int64{4, -4, 1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 2, + }, + }, + // Downscale: + // 0+0+3+1, 0+0+0+0 = 4, 1 + wantDeltas: []int64{4, -3}, + }, + }, + }, + { + name: "buckets with gaps of size 1", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 1, 0, 1, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 6, + }, + }, + wantDeltas: []int64{3, -2, -1, 1, -1, 1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 3+1, 0+1, 0+1 = 4, 1, 1 + wantDeltas: []int64{4, -3, 0}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 2, + }, + }, + // Downscale: + // 0+0+3+1, 0+1+0+1 = 4, 2 + wantDeltas: []int64{4, -2}, + }, + }, + }, + { + name: "buckets with gaps of size 2", + buckets: func() pmetric.ExponentialHistogramDataPointBuckets { + b := pmetric.NewExponentialHistogramDataPointBuckets() + b.SetOffset(-2) + b.BucketCounts().FromRaw([]uint64{3, 0, 0, 1, 0, 0, 1}) + return b + }, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: []prompb.BucketSpan{ + { + Offset: -1, + Length: 7, + }, + }, + wantDeltas: []int64{3, -3, 0, 1, -1, 0, 1}, + }, + 1: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 4, + }, + }, + // Downscale: + // 3+0, 0+1, 0+0, 0+1 = 3, 1, 0, 1 + wantDeltas: []int64{3, -2, -1, 1}, + }, + 2: { + wantSpans: []prompb.BucketSpan{ + { + Offset: 0, + Length: 3, + }, + }, + // Downscale: + // 0+0+3+0, 0+1+0+0, 1+0+0+0 = 3, 1, 1 + wantDeltas: []int64{3, -2, 0}, + }, + }, + }, + { + name: "zero buckets", + buckets: pmetric.NewExponentialHistogramDataPointBuckets, + wantLayout: map[int32]expectedBucketLayout{ + 0: { + wantSpans: nil, + wantDeltas: nil, + }, + 1: { + wantSpans: nil, + wantDeltas: nil, + }, + 2: { + wantSpans: nil, + wantDeltas: nil, + }, + }, + }, + } + for _, tt := range tests { + for scaleDown, wantLayout := range tt.wantLayout { + t.Run(fmt.Sprintf("%s-scaleby-%d", tt.name, scaleDown), func(t *testing.T) { + gotSpans, gotDeltas := convertBucketsLayout(tt.buckets(), scaleDown) + assert.Equal(t, wantLayout.wantSpans, gotSpans) + assert.Equal(t, wantLayout.wantDeltas, gotDeltas) + }) + } + } +} + +func BenchmarkConvertBucketLayout(b *testing.B) { + scenarios := []struct { + gap int + }{ + {gap: 0}, + {gap: 1}, + {gap: 2}, + {gap: 3}, + } + + for _, scenario := range scenarios { + buckets := pmetric.NewExponentialHistogramDataPointBuckets() + buckets.SetOffset(0) + for i := 0; i < 1000; i++ { + if i%(scenario.gap+1) == 0 { + buckets.BucketCounts().Append(10) + } else { + buckets.BucketCounts().Append(0) + } + } + b.Run(fmt.Sprintf("gap %d", scenario.gap), func(b *testing.B) { + for i := 0; i < b.N; i++ { + convertBucketsLayout(buckets, 0) + } + }) + } +} + +func TestExponentialToNativeHistogram(t *testing.T) { + tests := []struct { + name string + exponentialHist func() pmetric.ExponentialHistogramDataPoint + wantNativeHist func() prompb.Histogram + wantErrMessage string + }{ + { + name: "convert exp. to native histogram", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100))) + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(4) + pt.SetSum(10.1) + pt.SetScale(1) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Negative().SetOffset(1) + + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Sum: 10.1, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{1, 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + PositiveDeltas: []int64{1, 0}, + Timestamp: 500, + } + }, + }, + { + name: "convert exp. to native histogram with no sum", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100))) + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + + pt.SetCount(4) + pt.SetScale(1) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1}) + pt.Negative().SetOffset(1) + + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{1, 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + PositiveDeltas: []int64{1, 0}, + Timestamp: 500, + } + }, + }, + { + name: "invalid negative scale", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetScale(-10) + return pt + }, + wantErrMessage: "cannot convert exponential to native histogram." + + " Scale must be >= -4, was -10", + }, + { + name: "no downscaling at scale 8", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(6) + pt.SetSum(10.1) + pt.SetScale(8) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Negative().SetOffset(2) + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 6}, + Sum: 10.1, + Schema: 8, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + PositiveSpans: []prompb.BucketSpan{{Offset: 2, Length: 3}}, + PositiveDeltas: []int64{1, 0, 0}, // 1, 1, 1 + NegativeSpans: []prompb.BucketSpan{{Offset: 3, Length: 3}}, + NegativeDeltas: []int64{1, 0, 0}, // 1, 1, 1 + Timestamp: 500, + } + }, + }, + { + name: "downsample if scale is more than 8", + exponentialHist: func() pmetric.ExponentialHistogramDataPoint { + pt := pmetric.NewExponentialHistogramDataPoint() + pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500))) + pt.SetCount(6) + pt.SetSum(10.1) + pt.SetScale(9) + pt.SetZeroCount(1) + + pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Positive().SetOffset(1) + + pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1}) + pt.Negative().SetOffset(2) + return pt + }, + wantNativeHist: func() prompb.Histogram { + return prompb.Histogram{ + Count: &prompb.Histogram_CountInt{CountInt: 6}, + Sum: 10.1, + Schema: 8, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1}, + PositiveSpans: []prompb.BucketSpan{{Offset: 1, Length: 2}}, + PositiveDeltas: []int64{1, 1}, // 0+1, 1+1 = 1, 2 + NegativeSpans: []prompb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeDeltas: []int64{2, -1}, // 1+1, 1+0 = 2, 1 + Timestamp: 500, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + validateExponentialHistogramCount(t, tt.exponentialHist()) // Sanity check. + got, annots, err := exponentialToNativeHistogram(tt.exponentialHist()) + if tt.wantErrMessage != "" { + assert.ErrorContains(t, err, tt.wantErrMessage) + return + } + + require.NoError(t, err) + require.Empty(t, annots) + assert.Equal(t, tt.wantNativeHist(), got) + validateNativeHistogramCount(t, got) + }) + } +} + +func validateExponentialHistogramCount(t *testing.T, h pmetric.ExponentialHistogramDataPoint) { + actualCount := uint64(0) + for _, bucket := range h.Positive().BucketCounts().AsRaw() { + actualCount += bucket + } + for _, bucket := range h.Negative().BucketCounts().AsRaw() { + actualCount += bucket + } + require.Equal(t, h.Count(), actualCount, "exponential histogram count mismatch") +} + +func validateNativeHistogramCount(t *testing.T, h prompb.Histogram) { + require.NotNil(t, h.Count) + require.IsType(t, &prompb.Histogram_CountInt{}, h.Count) + want := h.Count.(*prompb.Histogram_CountInt).CountInt + var ( + actualCount uint64 + prevBucket int64 + ) + for _, delta := range h.PositiveDeltas { + prevBucket += delta + actualCount += uint64(prevBucket) + } + prevBucket = 0 + for _, delta := range h.NegativeDeltas { + prevBucket += delta + actualCount += uint64(prevBucket) + } + assert.Equal(t, want, actualCount, "native histogram count mismatch") +} + +func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) { + tests := []struct { + name string + metric func() pmetric.Metric + wantSeries func() map[uint64]*prompb.TimeSeries + }{ + { + name: "histogram data points with same labels", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(7) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2}) + pt.Exemplars().AppendEmpty().SetDoubleValue(1) + pt.Attributes().PutStr("attr", "test_attr") + + pt = metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(4) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2, 1}) + pt.Exemplars().AppendEmpty().SetDoubleValue(2) + pt.Attributes().PutStr("attr", "test_attr") + + return metric + }, + wantSeries: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 7}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 2}}, + PositiveDeltas: []int64{4, -2}, + }, + { + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 3}}, + PositiveDeltas: []int64{4, -2, -1}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 1}, + {Value: 2}, + }, + }, + } + }, + }, + { + name: "histogram data points with different labels", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_hist") + metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + + pt := metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(7) + pt.SetScale(1) + pt.Positive().SetOffset(-1) + pt.Positive().BucketCounts().FromRaw([]uint64{4, 2}) + pt.Exemplars().AppendEmpty().SetDoubleValue(1) + pt.Attributes().PutStr("attr", "test_attr") + + pt = metric.ExponentialHistogram().DataPoints().AppendEmpty() + pt.SetCount(4) + pt.SetScale(1) + pt.Negative().SetOffset(-1) + pt.Negative().BucketCounts().FromRaw([]uint64{4, 2, 1}) + pt.Exemplars().AppendEmpty().SetDoubleValue(2) + pt.Attributes().PutStr("attr", "test_attr_two") + + return metric + }, + wantSeries: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr"}, + } + labelsAnother := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_hist"}, + {Name: "attr", Value: "test_attr_two"}, + } + + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 7}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + PositiveSpans: []prompb.BucketSpan{{Offset: 0, Length: 2}}, + PositiveDeltas: []int64{4, -2}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 1}, + }, + }, + timeSeriesSignature(labelsAnother): { + Labels: labelsAnother, + Histograms: []prompb.Histogram{ + { + Count: &prompb.Histogram_CountInt{CountInt: 4}, + Schema: 1, + ZeroThreshold: defaultZeroThreshold, + ZeroCount: &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0}, + NegativeSpans: []prompb.BucketSpan{{Offset: 0, Length: 3}}, + NegativeDeltas: []int64{4, -2, -1}, + }, + }, + Exemplars: []prompb.Exemplar{ + {Value: 2}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + + converter := NewPrometheusConverter() + annots, err := converter.addExponentialHistogramDataPoints( + metric.ExponentialHistogram().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + prometheustranslator.BuildCompliantName(metric, "", true), + ) + require.NoError(t, err) + require.Empty(t, annots) + + assert.Equal(t, tt.wantSeries(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go new file mode 100644 index 0000000000..41afc8c4c3 --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go @@ -0,0 +1,258 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/number_data_points_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/prompb" + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) { + ts := uint64(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "gauge", + metric: func() pmetric.Metric { + return getIntGaugeMetric( + "test", + pcommon.NewMap(), + 1, ts, + ) + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + { + Value: 1, + Timestamp: convertTimeStamp(pcommon.Timestamp(ts)), + }}, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + converter.addGaugeNumberDataPoints( + metric.Gauge().DataPoints(), + pcommon.NewResource(), + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} + +func TestPrometheusConverter_addSumNumberDataPoints(t *testing.T) { + ts := pcommon.Timestamp(time.Now().UnixNano()) + tests := []struct { + name string + metric func() pmetric.Metric + want func() map[uint64]*prompb.TimeSeries + }{ + { + name: "sum", + metric: func() pmetric.Metric { + return getIntSumMetric( + "test", + pcommon.NewMap(), + 1, + uint64(ts.AsTime().UnixNano()), + ) + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + { + Value: 1, + Timestamp: convertTimeStamp(ts), + }}, + }, + } + }, + }, + { + name: "sum with exemplars", + metric: func() pmetric.Metric { + m := getIntSumMetric( + "test", + pcommon.NewMap(), + 1, + uint64(ts.AsTime().UnixNano()), + ) + m.Sum().DataPoints().At(0).Exemplars().AppendEmpty().SetDoubleValue(2) + return m + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{{ + Value: 1, + Timestamp: convertTimeStamp(ts), + }}, + Exemplars: []prompb.Exemplar{ + {Value: 2}, + }, + }, + } + }, + }, + { + name: "monotonic cumulative sum with start timestamp", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(true) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetDoubleValue(1) + dp.SetTimestamp(ts) + dp.SetStartTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + createdLabels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_sum" + createdSuffix}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 1, Timestamp: convertTimeStamp(ts)}, + }, + }, + timeSeriesSignature(createdLabels): { + Labels: createdLabels, + Samples: []prompb.Sample{ + {Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "monotonic cumulative sum with no start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(true) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + { + name: "non-monotonic cumulative sum with start time", + metric: func() pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName("test_sum") + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + metric.SetEmptySum().SetIsMonotonic(false) + + dp := metric.Sum().DataPoints().AppendEmpty() + dp.SetTimestamp(ts) + + return metric + }, + want: func() map[uint64]*prompb.TimeSeries { + labels := []prompb.Label{ + {Name: model.MetricNameLabel, Value: "test_sum"}, + } + return map[uint64]*prompb.TimeSeries{ + timeSeriesSignature(labels): { + Labels: labels, + Samples: []prompb.Sample{ + {Value: 0, Timestamp: convertTimeStamp(ts)}, + }, + }, + } + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := tt.metric() + converter := NewPrometheusConverter() + + converter.addSumNumberDataPoints( + metric.Sum().DataPoints(), + pcommon.NewResource(), + metric, + Settings{ + ExportCreatedMetric: true, + }, + metric.Name(), + ) + + assert.Equal(t, tt.want(), converter.unique) + assert.Empty(t, converter.conflicts) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go b/storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go new file mode 100644 index 0000000000..187127fcb2 --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go @@ -0,0 +1,55 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/testutil_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func getIntGaugeMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + dp := metric.SetEmptyGauge().DataPoints().AppendEmpty() + if strings.HasPrefix(name, "staleNaN") { + dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } + dp.SetIntValue(value) + attributes.CopyTo(dp.Attributes()) + + dp.SetStartTimestamp(pcommon.Timestamp(0)) + dp.SetTimestamp(pcommon.Timestamp(ts)) + return metric +} + +func getIntSumMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric { + metric := pmetric.NewMetric() + metric.SetName(name) + metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + dp := metric.Sum().DataPoints().AppendEmpty() + if strings.HasPrefix(name, "staleNaN") { + dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true)) + } + dp.SetIntValue(value) + attributes.CopyTo(dp.Attributes()) + + dp.SetStartTimestamp(pcommon.Timestamp(0)) + dp.SetTimestamp(pcommon.Timestamp(ts)) + return metric +} From d23d196db58cbf5ee67d64932aeb14b6038d16c9 Mon Sep 17 00:00:00 2001 From: machine424 Date: Fri, 30 Aug 2024 13:37:25 +0200 Subject: [PATCH 58/83] fix(discovery): prevent the manager from storing stale targetGroups Signed-off-by: machine424 --- discovery/manager.go | 10 +++++++++- discovery/manager_test.go | 18 ++++++++---------- scrape/manager_test.go | 2 +- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/discovery/manager.go b/discovery/manager.go index b7ebdb7e0b..cefa90a866 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -394,8 +394,16 @@ func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) { m.targets[poolKey] = make(map[string]*targetgroup.Group) } for _, tg := range tgs { - if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics. + // Some Discoverers send nil target group so need to check for it to avoid panics. + if tg == nil { + continue + } + if len(tg.Targets) > 0 { m.targets[poolKey][tg.Source] = tg + } else { + // The target group is empty, drop the corresponding entry to avoid leaks. + // In case the group yielded targets before, allGroups() will take care of making consumers drop them. + delete(m.targets[poolKey], tg.Source) } } } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 707c3931d7..831cefe514 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -1051,8 +1051,8 @@ func TestDiscovererConfigs(t *testing.T) { } // TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after -// removing all targets from the static_configs sends an update with empty targetGroups. -// This is required to signal the receiver that this target set has no current targets. +// removing all targets from the static_configs cleans the corresponding targetGroups entries to avoid leaks and sends an empty update. +// The update is required to signal the consumers that the previous targets should be dropped. func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1085,16 +1085,14 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { discoveryManager.ApplyConfig(c) syncedTargets = <-discoveryManager.SyncCh() + require.Len(t, discoveryManager.targets, 1) p = pk("static", "prometheus", 1) targetGroups, ok := discoveryManager.targets[p] - require.True(t, ok, "'%v' should be present in target groups", p) - group, ok := targetGroups[""] - require.True(t, ok, "missing '' key in target groups %v", targetGroups) - - require.Empty(t, group.Targets, "Invalid number of targets.") - require.Len(t, syncedTargets, 1) - require.Len(t, syncedTargets["prometheus"], 1) - require.Nil(t, syncedTargets["prometheus"][0].Labels) + require.True(t, ok, "'%v' should be present in targets", p) + // Otherwise the targetGroups will leak, see https://github.com/prometheus/prometheus/issues/12436. + require.Empty(t, targetGroups, 0, "'%v' should no longer have any associated target groups", p) + require.Len(t, syncedTargets, 1, "an update with no targetGroups should still be sent.") + require.Empty(t, syncedTargets["prometheus"], 0) } func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { diff --git a/scrape/manager_test.go b/scrape/manager_test.go index f260167b5e..c71691c95d 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -1178,7 +1178,7 @@ scrape_configs: ) } -// TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when aone of them should no, +// TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when one of them should no // longer discover targets, only the stale targets of that provier are dropped. func TestOnlyStaleTargetsAreDropped(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) From a693dd19f244f000da40bcbac85041846b78cfc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Fri, 30 Aug 2024 16:44:36 +0200 Subject: [PATCH 59/83] Fix: chunkenc.MockSeriesIterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starts its index from 0 , but users call Next() before first sample so it needs to start from -1 Signed-off-by: György Krajcsovits --- storage/interface_test.go | 37 +++++++++++++++++++++++++++++++++++++ tsdb/chunkenc/chunk.go | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 storage/interface_test.go diff --git a/storage/interface_test.go b/storage/interface_test.go new file mode 100644 index 0000000000..ba60721736 --- /dev/null +++ b/storage/interface_test.go @@ -0,0 +1,37 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" +) + +func TestMockSeries(t *testing.T) { + s := storage.MockSeries([]int64{1, 2, 3}, []float64{1, 2, 3}, []string{"__name__", "foo"}) + it := s.Iterator(nil) + ts := []int64{} + vs := []float64{} + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + ts = append(ts, t) + vs = append(vs, v) + } + require.Equal(t, []int64{1, 2, 3}, ts) + require.Equal(t, []float64{1, 2, 3}, vs) +} diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go index 1421f3b398..7082f34c3f 100644 --- a/tsdb/chunkenc/chunk.go +++ b/tsdb/chunkenc/chunk.go @@ -213,7 +213,7 @@ func MockSeriesIterator(timestamps []int64, values []float64) Iterator { return &mockSeriesIterator{ timeStamps: timestamps, values: values, - currIndex: 0, + currIndex: -1, } } From 471fb8c9788151e12df0a94a584c6d854f40087c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:07:51 +0000 Subject: [PATCH 60/83] Bump github/codeql-action from 3.25.15 to 3.26.6 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.25.15 to 3.26.6. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/afb54ba388a7dca6ecae48f608c4ff05ff4cc77a...4dd16135b69a43b6c8efb853346f8437d92d3c93) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql-analysis.yml | 6 +++--- .github/workflows/scorecards.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2e1bd30246..89aa2ba29b 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -27,12 +27,12 @@ jobs: uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Initialize CodeQL - uses: github/codeql-action/init@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15 + uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 1132c057f8..82cccb2bc1 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -45,6 +45,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # tag=v3.25.15 + uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # tag=v3.26.6 with: sarif_file: results.sarif From b2bb0d458bddc4c6eb6516ad18a178e764cf11d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:07:53 +0000 Subject: [PATCH 61/83] Bump prometheus/promci from 0.1.0 to 0.3.0 Bumps [prometheus/promci](https://github.com/prometheus/promci) from 0.1.0 to 0.3.0. - [Release notes](https://github.com/prometheus/promci/releases) - [Commits](https://github.com/prometheus/promci/compare/3cb0c3871f223bd5ce1226995bd52ffb314798b6...45166329da36d74895901808f1c8c97efafc7f84) --- updated-dependencies: - dependency-name: prometheus/promci dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c89c9507bb..d314ac50ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.22-base steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/setup_environment - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 - run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false @@ -28,7 +28,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.22-base steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./cmd/prometheus @@ -58,7 +58,7 @@ jobs: steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -115,7 +115,7 @@ jobs: thread: [ 0, 1, 2 ] steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -138,7 +138,7 @@ jobs: # should also be updated. steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -200,7 +200,7 @@ jobs: if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -214,7 +214,7 @@ jobs: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -229,7 +229,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 + - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - name: Install nodejs uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 with: From 8789a124bccb658c39c10059ea10cc5e892e9d3f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:07:57 +0000 Subject: [PATCH 62/83] Bump bufbuild/buf-setup-action from 1.35.1 to 1.39.0 Bumps [bufbuild/buf-setup-action](https://github.com/bufbuild/buf-setup-action) from 1.35.1 to 1.39.0. - [Release notes](https://github.com/bufbuild/buf-setup-action/releases) - [Commits](https://github.com/bufbuild/buf-setup-action/compare/aceb106d2419c4cff48863df90161d92decb8591...54abbed4fe8d8d45173eca4798b0c39a53a7b658) --- updated-dependencies: - dependency-name: bufbuild/buf-setup-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/buf-lint.yml | 2 +- .github/workflows/buf.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 9f60a2336d..3f6cf76e16 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@aceb106d2419c4cff48863df90161d92decb8591 # v1.35.1 + - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 1856fb95e7..632d38cb00 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -13,7 +13,7 @@ jobs: if: github.repository_owner == 'prometheus' steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@aceb106d2419c4cff48863df90161d92decb8591 # v1.35.1 + - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 From 8dd49103ba6081fb1d5a8be8f3aa0062ac474552 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:15:51 +0000 Subject: [PATCH 63/83] Bump github.com/prometheus/common Bumps [github.com/prometheus/common](https://github.com/prometheus/common) from 0.55.0 to 0.57.0. - [Release notes](https://github.com/prometheus/common/releases) - [Changelog](https://github.com/prometheus/common/blob/main/RELEASE.md) - [Commits](https://github.com/prometheus/common/compare/v0.55.0...v0.57.0) --- updated-dependencies: - dependency-name: github.com/prometheus/common dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- documentation/examples/remote_storage/go.mod | 12 ++++----- documentation/examples/remote_storage/go.sum | 28 ++++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index bab39303d7..e5e052469b 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -8,8 +8,8 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/influxdata/influxdb v1.11.5 - github.com/prometheus/client_golang v1.19.1 - github.com/prometheus/common v0.55.0 + github.com/prometheus/client_golang v1.20.0 + github.com/prometheus/common v0.57.0 github.com/prometheus/prometheus v0.53.1 github.com/stretchr/testify v1.9.0 ) @@ -35,7 +35,7 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.8 // indirect + github.com/klauspost/compress v1.17.9 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -55,10 +55,10 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.24.0 // indirect - golang.org/x/net v0.26.0 // indirect + golang.org/x/crypto v0.25.0 // indirect + golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.21.0 // indirect + golang.org/x/sys v0.22.0 // indirect golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index 6e283cc749..34c474ef89 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -187,8 +187,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= -github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -253,8 +253,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= +github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -264,8 +264,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= +github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= @@ -323,8 +323,8 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= -golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= +golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -344,8 +344,8 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= @@ -373,11 +373,11 @@ golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= -golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From 40e6188b6d085eb3a3b76105390a8b4e7aa03de4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:48:48 +0000 Subject: [PATCH 64/83] Bump github.com/envoyproxy/protoc-gen-validate from 1.0.4 to 1.1.0 Bumps [github.com/envoyproxy/protoc-gen-validate](https://github.com/envoyproxy/protoc-gen-validate) from 1.0.4 to 1.1.0. - [Release notes](https://github.com/envoyproxy/protoc-gen-validate/releases) - [Changelog](https://github.com/bufbuild/protoc-gen-validate/blob/main/.goreleaser.yaml) - [Commits](https://github.com/envoyproxy/protoc-gen-validate/compare/v1.0.4...v1.1.0) --- updated-dependencies: - dependency-name: github.com/envoyproxy/protoc-gen-validate dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index af327c64ad..a840a1b6ac 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/docker/docker v27.1.1+incompatible github.com/edsrzf/mmap-go v1.1.0 github.com/envoyproxy/go-control-plane v0.12.0 - github.com/envoyproxy/protoc-gen-validate v1.0.4 + github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fsnotify/fsnotify v1.7.0 github.com/go-kit/log v0.2.1 diff --git a/go.sum b/go.sum index 933ef94201..97dcfe0908 100644 --- a/go.sum +++ b/go.sum @@ -171,8 +171,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/go-control-plane v0.12.0 h1:4X+VP1GHd1Mhj6IB5mMeGbLCleqxjletLK6K0rbxyZI= github.com/envoyproxy/go-control-plane v0.12.0/go.mod h1:ZBTaoJ23lqITozF0M6G4/IragXCQKCnYbmlmtHvwRG0= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A= -github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew= +github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= +github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1SCxNI1/Tieq/NFvh6dzLdgi7eu0tM= From 6290287c1ccdb2935965383895c1683aacb513b4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:49:28 +0000 Subject: [PATCH 65/83] Bump github.com/prometheus/client_golang from 1.20.0 to 1.20.2 Bumps [github.com/prometheus/client_golang](https://github.com/prometheus/client_golang) from 1.20.0 to 1.20.2. - [Release notes](https://github.com/prometheus/client_golang/releases) - [Changelog](https://github.com/prometheus/client_golang/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus/client_golang/compare/v1.20.0...v1.20.2) --- updated-dependencies: - dependency-name: github.com/prometheus/client_golang dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index af327c64ad..cb792413a7 100644 --- a/go.mod +++ b/go.mod @@ -52,7 +52,7 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.0 + github.com/prometheus/client_golang v1.20.2 github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.56.0 github.com/prometheus/common/assets v0.2.0 diff --git a/go.sum b/go.sum index 933ef94201..058cf3253d 100644 --- a/go.sum +++ b/go.sum @@ -608,8 +608,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= -github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg= +github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= From c09f6f80c6b768b39f6851c711a68c7a23773caa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:49:32 +0000 Subject: [PATCH 66/83] Bump golang.org/x/oauth2 from 0.21.0 to 0.22.0 Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.21.0 to 0.22.0. - [Commits](https://github.com/golang/oauth2/compare/v0.21.0...v0.22.0) --- updated-dependencies: - dependency-name: golang.org/x/oauth2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index af327c64ad..7a3844a0b9 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( go.uber.org/automaxprocs v1.5.3 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 - golang.org/x/oauth2 v0.21.0 + golang.org/x/oauth2 v0.22.0 golang.org/x/sync v0.7.0 golang.org/x/sys v0.22.0 golang.org/x/text v0.16.0 diff --git a/go.sum b/go.sum index 933ef94201..2f77c00a0d 100644 --- a/go.sum +++ b/go.sum @@ -865,8 +865,8 @@ golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4Iltr golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= From fae66d0bba182f6e3745a8ff04237d59fb9a575d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:49:48 +0000 Subject: [PATCH 67/83] Bump github.com/hashicorp/consul/api from 1.29.2 to 1.29.4 Bumps [github.com/hashicorp/consul/api](https://github.com/hashicorp/consul) from 1.29.2 to 1.29.4. - [Release notes](https://github.com/hashicorp/consul/releases) - [Changelog](https://github.com/hashicorp/consul/blob/main/CHANGELOG.md) - [Commits](https://github.com/hashicorp/consul/compare/api/v1.29.2...api/v1.29.4) --- updated-dependencies: - dependency-name: github.com/hashicorp/consul/api dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index af327c64ad..2f5524dd4e 100644 --- a/go.mod +++ b/go.mod @@ -36,7 +36,7 @@ require ( github.com/gophercloud/gophercloud v1.14.0 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc github.com/grpc-ecosystem/grpc-gateway v1.16.0 - github.com/hashicorp/consul/api v1.29.2 + github.com/hashicorp/consul/api v1.29.4 github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 github.com/hetznercloud/hcloud-go/v2 v2.12.0 github.com/ionos-cloud/sdk-go/v6 v6.2.0 diff --git a/go.sum b/go.sum index 933ef94201..20f88c1023 100644 --- a/go.sum +++ b/go.sum @@ -353,8 +353,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFb github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= -github.com/hashicorp/consul/api v1.29.2 h1:aYyRn8EdE2mSfG14S1+L9Qkjtz8RzmaWh6AcNGRNwPw= -github.com/hashicorp/consul/api v1.29.2/go.mod h1:0YObcaLNDSbtlgzIRtmRXI1ZkeuK0trCBxwZQ4MYnIk= +github.com/hashicorp/consul/api v1.29.4 h1:P6slzxDLBOxUSj3fWo2o65VuKtbtOXFi7TSSgtXutuE= +github.com/hashicorp/consul/api v1.29.4/go.mod h1:HUlfw+l2Zy68ceJavv2zAyArl2fqhGWnMycyt56sBgg= github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0= github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg= github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= From 02d1cabd0a182dd6c98fd522cdfd1aee885ffa04 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Sep 2024 23:50:13 +0000 Subject: [PATCH 68/83] Bump github.com/hetznercloud/hcloud-go/v2 from 2.12.0 to 2.13.1 Bumps [github.com/hetznercloud/hcloud-go/v2](https://github.com/hetznercloud/hcloud-go) from 2.12.0 to 2.13.1. - [Release notes](https://github.com/hetznercloud/hcloud-go/releases) - [Changelog](https://github.com/hetznercloud/hcloud-go/blob/main/CHANGELOG.md) - [Commits](https://github.com/hetznercloud/hcloud-go/compare/v2.12.0...v2.13.1) --- updated-dependencies: - dependency-name: github.com/hetznercloud/hcloud-go/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index af327c64ad..86e17fec3b 100644 --- a/go.mod +++ b/go.mod @@ -38,7 +38,7 @@ require ( github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/hashicorp/consul/api v1.29.2 github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 - github.com/hetznercloud/hcloud-go/v2 v2.12.0 + github.com/hetznercloud/hcloud-go/v2 v2.13.1 github.com/ionos-cloud/sdk-go/v6 v6.2.0 github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.9 diff --git a/go.sum b/go.sum index 933ef94201..1195580280 100644 --- a/go.sum +++ b/go.sum @@ -414,8 +414,8 @@ github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3/go.mod h1:svtx github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= -github.com/hetznercloud/hcloud-go/v2 v2.12.0 h1:nOgfNTo0gyXZJJdM8mo/XH5MO/e80wAEpldRzdWayhY= -github.com/hetznercloud/hcloud-go/v2 v2.12.0/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= +github.com/hetznercloud/hcloud-go/v2 v2.13.1 h1:jq0GP4QaYE5d8xR/Zw17s9qoaESRJMXfGmtD1a/qckQ= +github.com/hetznercloud/hcloud-go/v2 v2.13.1/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= From 956245b25bdd99c423c44e7e491fe0db69036c7f Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Mon, 2 Sep 2024 11:27:39 +0200 Subject: [PATCH 69/83] promqltest: adjust eval times and range selector In order to fix new tests for changes added in https://github.com/prometheus/prometheus/pull/13904. Signed-off-by: Jan Fajerski --- .../testdata/name_label_dropping.test | 38 +++++++++---------- .../testdata/native_histograms.test | 4 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 1f1dac3602..5f5dcd5e4d 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -4,81 +4,81 @@ load 5m another_metric{env="1"} 60 120 180 # Does not drop __name__ for vector selector -eval instant at 15m metric{env="1"} +eval instant at 10m metric{env="1"} metric{env="1"} 120 # Drops __name__ for unary operators -eval instant at 15m -metric +eval instant at 10m -metric {env="1"} -120 # Drops __name__ for binary operators -eval instant at 15m metric + another_metric +eval instant at 10m metric + another_metric {env="1"} 300 # Does not drop __name__ for binary comparison operators -eval instant at 15m metric <= another_metric +eval instant at 10m metric <= another_metric metric{env="1"} 120 # Drops __name__ for binary comparison operators with "bool" modifier -eval instant at 15m metric <= bool another_metric +eval instant at 10m metric <= bool another_metric {env="1"} 1 # Drops __name__ for vector-scalar operations -eval instant at 15m metric * 2 +eval instant at 10m metric * 2 {env="1"} 240 # Drops __name__ for instant-vector functions -eval instant at 15m clamp(metric, 0, 100) +eval instant at 10m clamp(metric, 0, 100) {env="1"} 100 # Drops __name__ for range-vector functions -eval instant at 15m rate(metric{env="1"}[10m]) +eval instant at 10m rate(metric{env="1"}[10m]) {env="1"} 0.2 # Does not drop __name__ for last_over_time function -eval instant at 15m last_over_time(metric{env="1"}[10m]) +eval instant at 10m last_over_time(metric{env="1"}[10m]) metric{env="1"} 120 # Drops name for other _over_time functions -eval instant at 15m max_over_time(metric{env="1"}[10m]) +eval instant at 10m max_over_time(metric{env="1"}[10m]) {env="1"} 120 # Allows relabeling (to-be-dropped) __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") +eval instant at 10m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") {my_name="rate_metric", env="1"} 0.2 {my_name="rate_another_metric", env="1"} 0.2 # Allows preserving __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") +eval instant at 10m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") rate_metric{env="1"} 0.2 rate_another_metric{env="1"} 0.2 # Allows relabeling (to-be-dropped) __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") +eval instant at 10m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") {my_name="metric", env="1"} 0.2 {my_name="another_metric", env="1"} 0.2 # Allows preserving __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") +eval instant at 10m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") metric_1{env="1"} 0.2 another_metric_1{env="1"} 0.2 # Does not drop metric names fro aggregation operators -eval instant at 15m sum by (__name__, env) (metric{env="1"}) +eval instant at 10m sum by (__name__, env) (metric{env="1"}) metric{env="1"} 120 # Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label) # This is an accidental side effect of delayed __name__ label dropping -eval_fail instant at 15m sum by (__name__) (rate({env="1"}[10m])) +eval_fail instant at 10m sum by (__name__) (rate({env="1"}[10m])) # Aggregation operators aggregate metrics with same labelset and to-be-dropped names # This is an accidental side effect of delayed __name__ label dropping -eval instant at 15m sum(rate({env="1"}[10m])) by (env) +eval instant at 10m sum(rate({env="1"}[10m])) by (env) {env="1"} 0.4 # Aggregationk operators propagate __name__ label dropping information -eval instant at 15m topk(10, sum by (__name__, env) (metric{env="1"})) +eval instant at 10m topk(10, sum by (__name__, env) (metric{env="1"})) metric{env="1"} 120 -eval instant at 15m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) +eval instant at 10m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) {env="1"} 0.2 diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index ffbcc7773e..549781e8c5 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -995,8 +995,8 @@ clear load 1m histogram_sum_over_time {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:1 count:0}} -eval instant at 3m sum_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m sum_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} -eval instant at 3m avg_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m avg_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}} From 6c08f8f5597c781215e1c561c941a32b5100e7d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:06:35 +0000 Subject: [PATCH 70/83] Bump github.com/ionos-cloud/sdk-go/v6 from 6.2.0 to 6.2.1 Bumps [github.com/ionos-cloud/sdk-go/v6](https://github.com/ionos-cloud/sdk-go) from 6.2.0 to 6.2.1. - [Release notes](https://github.com/ionos-cloud/sdk-go/releases) - [Changelog](https://github.com/ionos-cloud/sdk-go/blob/master/docs/CHANGELOG.md) - [Commits](https://github.com/ionos-cloud/sdk-go/compare/v6.2.0...v6.2.1) --- updated-dependencies: - dependency-name: github.com/ionos-cloud/sdk-go/v6 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ecb6efefdb..79313967fc 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,7 @@ require ( github.com/hashicorp/consul/api v1.29.4 github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 github.com/hetznercloud/hcloud-go/v2 v2.13.1 - github.com/ionos-cloud/sdk-go/v6 v6.2.0 + github.com/ionos-cloud/sdk-go/v6 v6.2.1 github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.9 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b diff --git a/go.sum b/go.sum index e898588f1b..f1659d3cd0 100644 --- a/go.sum +++ b/go.sum @@ -423,8 +423,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= -github.com/ionos-cloud/sdk-go/v6 v6.2.0 h1:qX7gachC0wJSmFfVRnd+DHmz9AStvVraKcwQ/JokIB4= -github.com/ionos-cloud/sdk-go/v6 v6.2.0/go.mod h1:EzEgRIDxBELvfoa/uBN0kOQaqovLjUWEB7iW4/Q+t4k= +github.com/ionos-cloud/sdk-go/v6 v6.2.1 h1:mxxN+frNVmbFrmmFfXnBC3g2USYJrl6mc1LW2iNYbFY= +github.com/ionos-cloud/sdk-go/v6 v6.2.1/go.mod h1:SXrO9OGyWjd2rZhAhEpdYN6VUAODzzqRdqA9BCviQtI= github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= From e8ecc118167f8dc69d909e477a51f185a17fb587 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 16:39:02 +0100 Subject: [PATCH 71/83] [TESTS] Fix up item numbers in TestProtobufParse If an error is thrown by the test, these numbers help you see which item is wrong. Signed-off-by: Bryan Boreham --- model/textparse/protobufparse_test.go | 50 +++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index e323a6cc8f..534addfa9f 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -1995,15 +1995,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "without_quantiles_sum", ), }, - { // 78 + { // 81 m: "empty_histogram", help: "A histogram without observations and with a zero threshold of zero but with a no-op span to identify it as a native histogram.", }, - { // 79 + { // 82 m: "empty_histogram", typ: model.MetricTypeHistogram, }, - { // 80 + { // 83 m: "empty_histogram", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -2014,15 +2014,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "empty_histogram", ), }, - { // 81 + { // 84 m: "test_counter_with_createdtimestamp", help: "A counter with a created timestamp.", }, - { // 82 + { // 85 m: "test_counter_with_createdtimestamp", typ: model.MetricTypeCounter, }, - { // 83 + { // 86 m: "test_counter_with_createdtimestamp", v: 42, ct: 1000, @@ -2030,15 +2030,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_counter_with_createdtimestamp", ), }, - { // 84 + { // 87 m: "test_summary_with_createdtimestamp", help: "A summary with a created timestamp.", }, - { // 85 + { // 88 m: "test_summary_with_createdtimestamp", typ: model.MetricTypeSummary, }, - { // 86 + { // 89 m: "test_summary_with_createdtimestamp_count", v: 42, ct: 1000, @@ -2046,7 +2046,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_summary_with_createdtimestamp_count", ), }, - { // 87 + { // 90 m: "test_summary_with_createdtimestamp_sum", v: 1.234, ct: 1000, @@ -2054,15 +2054,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_summary_with_createdtimestamp_sum", ), }, - { // 88 + { // 91 m: "test_histogram_with_createdtimestamp", help: "A histogram with a created timestamp.", }, - { // 89 + { // 92 m: "test_histogram_with_createdtimestamp", typ: model.MetricTypeHistogram, }, - { // 90 + { // 93 m: "test_histogram_with_createdtimestamp", ct: 1000, shs: &histogram.Histogram{ @@ -2074,15 +2074,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_createdtimestamp", ), }, - { // 91 + { // 94 m: "test_gaugehistogram_with_createdtimestamp", help: "A gauge histogram with a created timestamp.", }, - { // 92 + { // 95 m: "test_gaugehistogram_with_createdtimestamp", typ: model.MetricTypeGaugeHistogram, }, - { // 93 + { // 96 m: "test_gaugehistogram_with_createdtimestamp", ct: 1000, shs: &histogram.Histogram{ @@ -2094,15 +2094,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_gaugehistogram_with_createdtimestamp", ), }, - { // 94 + { // 97 m: "test_histogram_with_native_histogram_exemplars", help: "A histogram with native histogram exemplars.", }, - { // 95 + { // 98 m: "test_histogram_with_native_histogram_exemplars", typ: model.MetricTypeHistogram, }, - { // 96 + { // 99 m: "test_histogram_with_native_histogram_exemplars", t: 1234568, shs: &histogram.Histogram{ @@ -2130,7 +2130,7 @@ func TestProtobufParse(t *testing.T) { {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, }, - { // 97 + { // 100 m: "test_histogram_with_native_histogram_exemplars_count", t: 1234568, v: 175, @@ -2138,7 +2138,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_native_histogram_exemplars_count", ), }, - { // 98 + { // 101 m: "test_histogram_with_native_histogram_exemplars_sum", t: 1234568, v: 0.0008280461746287094, @@ -2146,7 +2146,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_native_histogram_exemplars_sum", ), }, - { // 99 + { // 102 m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0004899999999999998", t: 1234568, v: 2, @@ -2155,7 +2155,7 @@ func TestProtobufParse(t *testing.T) { "le", "-0.0004899999999999998", ), }, - { // 100 + { // 103 m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0003899999999999998", t: 1234568, v: 4, @@ -2167,7 +2167,7 @@ func TestProtobufParse(t *testing.T) { {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 101 + { // 104 m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0002899999999999998", t: 1234568, v: 16, @@ -2179,7 +2179,7 @@ func TestProtobufParse(t *testing.T) { {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 102 + { // 105 m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff+Inf", t: 1234568, v: 175, From 3aaf2c3c9d82c24a0ab8cacd5c68a67d995c312b Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 16:39:45 +0100 Subject: [PATCH 72/83] [TESTS] Add second native histogram with exemplars in TestProtobufParse This test fails. Signed-off-by: Bryan Boreham --- model/textparse/protobufparse_test.go | 186 ++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index 534addfa9f..cf34ae52df 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -695,6 +695,70 @@ metric: < timestamp_ms: 1234568 > +`, + + `name: "test_histogram_with_native_histogram_exemplars2" +help: "Another histogram with native histogram exemplars." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + exemplars: < + label: < + name: "dummyID" + value: "59780" + > + value: -0.00039 + timestamp: < + seconds: 1625851155 + nanos: 146848499 + > + > + > + timestamp_ms: 1234568 +> + `, } @@ -1276,6 +1340,41 @@ func TestProtobufParse(t *testing.T) { {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, }, + { + m: "test_histogram_with_native_histogram_exemplars2", + help: "Another histogram with native histogram exemplars.", + }, + { + m: "test_histogram_with_native_histogram_exemplars2", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram_with_native_histogram_exemplars2", + t: 1234568, + shs: &histogram.Histogram{ + Count: 175, + ZeroCount: 2, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + Schema: 3, + PositiveSpans: []histogram.Span{ + {Offset: -161, Length: 1}, + {Offset: 8, Length: 3}, + }, + NegativeSpans: []histogram.Span{ + {Offset: -162, Length: 1}, + {Offset: 23, Length: 4}, + }, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2", + ), + e: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, + }, + }, }, }, { @@ -2188,6 +2287,93 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { // 106 + m: "test_histogram_with_native_histogram_exemplars2", + help: "Another histogram with native histogram exemplars.", + }, + { // 107 + m: "test_histogram_with_native_histogram_exemplars2", + typ: model.MetricTypeHistogram, + }, + { // 108 + m: "test_histogram_with_native_histogram_exemplars2", + t: 1234568, + shs: &histogram.Histogram{ + Count: 175, + ZeroCount: 2, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + Schema: 3, + PositiveSpans: []histogram.Span{ + {Offset: -161, Length: 1}, + {Offset: 8, Length: 3}, + }, + NegativeSpans: []histogram.Span{ + {Offset: -162, Length: 1}, + {Offset: 23, Length: 4}, + }, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2", + ), + e: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, + }, + }, + { // 109 + m: "test_histogram_with_native_histogram_exemplars2_count", + t: 1234568, + v: 175, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_count", + ), + }, + { // 110 + m: "test_histogram_with_native_histogram_exemplars2_sum", + t: 1234568, + v: 0.0008280461746287094, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_sum", + ), + }, + { // 111 + m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0004899999999999998", + t: 1234568, + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", + "le", "-0.0004899999999999998", + ), + }, + { // 112 + m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0003899999999999998", + t: 1234568, + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", + "le", "-0.0003899999999999998", + ), + }, + { // 113 + m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0002899999999999998", + t: 1234568, + v: 16, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", + "le", "-0.0002899999999999998", + ), + }, + { // 114 + m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff+Inf", + t: 1234568, + v: 175, + lset: labels.FromStrings( + "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", + "le", "+Inf", + ), + }, }, }, } From d599c4b28c406c95051de5e4905e17203fca0d70 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 16:42:48 +0100 Subject: [PATCH 73/83] [BUGFIX] Protobuf scraping: reset exemplar position Signed-off-by: Bryan Boreham --- model/textparse/protobufparse.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index ea3a2e1a34..d7cbd3519a 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -409,6 +409,7 @@ func (p *ProtobufParser) Next() (Entry, error) { switch p.state { case EntryInvalid: p.metricPos = 0 + p.exemplarPos = 0 p.fieldPos = -2 n, err := readDelimited(p.in[p.inPos:], p.mf) p.inPos += n @@ -485,6 +486,7 @@ func (p *ProtobufParser) Next() (Entry, error) { p.metricPos++ p.fieldPos = -2 p.fieldsDone = false + p.exemplarPos = 0 // If this is a metric family containing native // histograms, we have to switch back to native // histograms after parsing a classic histogram. From ce7d830f1f99f3b00c9d6d50ae8e816a1a542d14 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 2 Sep 2024 18:20:10 +0200 Subject: [PATCH 74/83] Bring back BenchmarkLoadRealWLs (#14757) This was part of #14525 which was reverted. I still think that having this benchmark committed in to the repo is useful. Signed-off-by: Oleg Zaytsev --- tsdb/head_test.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index a7a8847e80..b32b37fcad 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -438,6 +438,32 @@ func BenchmarkLoadWLs(b *testing.B) { } } +// BenchmarkLoadRealWLs will be skipped unless the BENCHMARK_LOAD_REAL_WLS_DIR environment variable is set. +// BENCHMARK_LOAD_REAL_WLS_DIR should be the folder where `wal` and `chunks_head` are located. +func BenchmarkLoadRealWLs(b *testing.B) { + dir := os.Getenv("BENCHMARK_LOAD_REAL_WLS_DIR") + if dir == "" { + b.SkipNow() + } + + wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), wlog.CompressionNone) + require.NoError(b, err) + b.Cleanup(func() { wal.Close() }) + + wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), wlog.CompressionNone) + require.NoError(b, err) + b.Cleanup(func() { wbl.Close() }) + + // Load the WAL. + for i := 0; i < b.N; i++ { + opts := DefaultHeadOptions() + opts.ChunkDirRoot = dir + h, err := NewHead(nil, nil, wal, wbl, opts, nil) + require.NoError(b, err) + require.NoError(b, h.Init(0)) + } +} + // TestHead_HighConcurrencyReadAndWrite generates 1000 series with a step of 15s and fills a whole block with samples, // this means in total it generates 4000 chunks because with a step of 15s there are 4 chunks per block per series. // While appending the samples to the head it concurrently queries them from multiple go routines and verifies that the From 442f24e09927957936dd2a5b2c5c0817da0674cf Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Mon, 2 Sep 2024 17:30:37 -0300 Subject: [PATCH 75/83] chore: Simplify TestHeadAppender_AppendCTZeroSample (#14812) Signed-off-by: Arthur Silva Sens --- tsdb/head_test.go | 81 +++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 45 deletions(-) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index b32b37fcad..18ec4f0aca 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -33,7 +33,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/atomic" "golang.org/x/sync/errgroup" @@ -5945,16 +5944,16 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { for _, tc := range []struct { name string appendableSamples []appendableSamples - expectedSamples []model.Sample + expectedSamples []chunks.Sample }{ { name: "In order ct+normal sample", appendableSamples: []appendableSamples{ {ts: 100, val: 10, ct: 1}, }, - expectedSamples: []model.Sample{ - {Timestamp: 1, Value: 0}, - {Timestamp: 100, Value: 10}, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, }, }, { @@ -5963,10 +5962,10 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { {ts: 100, val: 10, ct: 1}, {ts: 101, val: 10, ct: 1}, }, - expectedSamples: []model.Sample{ - {Timestamp: 1, Value: 0}, - {Timestamp: 100, Value: 10}, - {Timestamp: 101, Value: 10}, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, + sample{t: 101, f: 10}, }, }, { @@ -5975,11 +5974,11 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { {ts: 100, val: 10, ct: 1}, {ts: 102, val: 10, ct: 101}, }, - expectedSamples: []model.Sample{ - {Timestamp: 1, Value: 0}, - {Timestamp: 100, Value: 10}, - {Timestamp: 101, Value: 0}, - {Timestamp: 102, Value: 10}, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, + sample{t: 101, f: 0}, + sample{t: 102, f: 10}, }, }, { @@ -5988,41 +5987,33 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { {ts: 100, val: 10, ct: 1}, {ts: 101, val: 10, ct: 100}, }, - expectedSamples: []model.Sample{ - {Timestamp: 1, Value: 0}, - {Timestamp: 100, Value: 10}, - {Timestamp: 101, Value: 10}, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, + sample{t: 101, f: 10}, }, }, } { - h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) - defer func() { - require.NoError(t, h.Close()) - }() - a := h.Appender(context.Background()) - lbls := labels.FromStrings("foo", "bar") - for _, sample := range tc.appendableSamples { - _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) - require.NoError(t, err) - _, err = a.Append(0, lbls, sample.ts, sample.val) - require.NoError(t, err) - } - require.NoError(t, a.Commit()) + t.Run(tc.name, func(t *testing.T) { + h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + a := h.Appender(context.Background()) + lbls := labels.FromStrings("foo", "bar") + for _, sample := range tc.appendableSamples { + _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) + require.NoError(t, err) + _, err = a.Append(0, lbls, sample.ts, sample.val) + require.NoError(t, err) + } + require.NoError(t, a.Commit()) - q, err := NewBlockQuerier(h, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) - require.True(t, ss.Next()) - s := ss.At() - require.False(t, ss.Next()) - it := s.Iterator(nil) - for _, sample := range tc.expectedSamples { - require.Equal(t, chunkenc.ValFloat, it.Next()) - timestamp, value := it.At() - require.Equal(t, sample.Timestamp, model.Time(timestamp)) - require.Equal(t, sample.Value, model.SampleValue(value)) - } - require.Equal(t, chunkenc.ValNone, it.Next()) + q, err := NewBlockQuerier(h, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + result := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.Equal(t, tc.expectedSamples, result[`{foo="bar"}`]) + }) } } From f1d3c3f46468e97f5bad0aa0004f9e5775eb3429 Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Tue, 3 Sep 2024 08:33:44 +0200 Subject: [PATCH 76/83] fix: Properly put OM text p.CreatedTimestamp behind feature flag (#14809) (#14815) Mitigates https://github.com/prometheus/prometheus/issues/14808 Signed-off-by: Manik Rana Co-authored-by: Manik Rana --- scrape/scrape.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index 5fcd623c0d..2abd4691d6 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1655,12 +1655,14 @@ loop: if seriesAlreadyScraped && parsedTimestamp == nil { err = storage.ErrDuplicateSampleForTimestamp } else { - if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil { - ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) - if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. - // CT is an experimental feature. For now, we don't need to fail the - // scrape on errors updating the created timestamp, log debug. - level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + if sl.enableCTZeroIngestion { + if ctMs := p.CreatedTimestamp(); ctMs != nil { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. + // CT is an experimental feature. For now, we don't need to fail the + // scrape on errors updating the created timestamp, log debug. + level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + } } } From e896643403412eda013c2e11c68939290ccff53e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:14:58 +0000 Subject: [PATCH 77/83] Bump actions/setup-node from 4.0.2 to 4.0.3 Bumps [actions/setup-node](https://github.com/actions/setup-node) from 4.0.2 to 4.0.3. - [Release notes](https://github.com/actions/setup-node/releases) - [Commits](https://github.com/actions/setup-node/compare/60edb5dd545a775178f52524783378180af0d1f8...1e60f620b9541d16bece96c5465dc8ee9832be0b) --- updated-dependencies: - dependency-name: actions/setup-node dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d314ac50ad..5934a3dafe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -231,7 +231,7 @@ jobs: uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 - name: Install nodejs - uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 + uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 with: node-version-file: "web/ui/.nvmrc" registry-url: "https://registry.npmjs.org" From 87420774980288cc4d9c59342bf9ff278db8c04c Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 20:20:40 +0100 Subject: [PATCH 78/83] [BUGFIX] PromQL: pass Context so spans parent correctly Assigning to `evaluator.ctx` in `eval()` broke the parent-child relationship. Signed-off-by: Bryan Boreham --- promql/engine.go | 103 +++++++++++++++++++++----------------------- promql/functions.go | 9 ++-- 2 files changed, 53 insertions(+), 59 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index b54ce2d6dc..537eafb41e 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -713,7 +713,6 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval startTimestamp: start, endTimestamp: start, interval: 1, - ctx: ctxInnerEval, maxSamples: ng.maxSamplesPerQuery, logger: ng.logger, lookbackDelta: s.LookbackDelta, @@ -723,7 +722,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval } query.sampleStats.InitStepTracking(start, start, 1) - val, warnings, err := evaluator.Eval(s.Expr) + val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) evalSpanTimer.Finish() @@ -772,7 +771,6 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval startTimestamp: timeMilliseconds(s.Start), endTimestamp: timeMilliseconds(s.End), interval: durationMilliseconds(s.Interval), - ctx: ctxInnerEval, maxSamples: ng.maxSamplesPerQuery, logger: ng.logger, lookbackDelta: s.LookbackDelta, @@ -781,7 +779,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval enableDelayedNameRemoval: ng.enableDelayedNameRemoval, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) - val, warnings, err := evaluator.Eval(s.Expr) + val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) evalSpanTimer.Finish() @@ -1029,8 +1027,6 @@ func (e errWithWarnings) Error() string { return e.err.Error() } // querier and reports errors. On timeout or cancellation of its context it // terminates. type evaluator struct { - ctx context.Context - startTimestamp int64 // Start time in milliseconds. endTimestamp int64 // End time in milliseconds. interval int64 // Interval in milliseconds. @@ -1079,10 +1075,10 @@ func (ev *evaluator) recover(expr parser.Expr, ws *annotations.Annotations, errp } } -func (ev *evaluator) Eval(expr parser.Expr) (v parser.Value, ws annotations.Annotations, err error) { +func (ev *evaluator) Eval(ctx context.Context, expr parser.Expr) (v parser.Value, ws annotations.Annotations, err error) { defer ev.recover(expr, &ws, &err) - v, ws = ev.eval(expr) + v, ws = ev.eval(ctx, expr) if ev.enableDelayedNameRemoval { ev.cleanupMetricLabels(v) } @@ -1133,7 +1129,7 @@ func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) { // function call results. // The prepSeries function (if provided) can be used to prepare the helper // for each series, then passed to each call funcCall. -func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]parser.Value, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { +func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]parser.Value, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 matrixes := make([]Matrix, len(exprs)) origMatrixes := make([]Matrix, len(exprs)) @@ -1144,7 +1140,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) // Functions will take string arguments from the expressions, not the values. if e != nil && e.Type() != parser.ValueTypeString { // ev.currentSamples will be updated to the correct value within the ev.eval call. - val, ws := ev.eval(e) + val, ws := ev.eval(ctx, e) warnings.Merge(ws) matrixes[i] = val.(Matrix) @@ -1196,7 +1192,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) } for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } // Reset number of samples in memory after each timestamp. @@ -1306,7 +1302,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) return mat, warnings } -func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping []string, inputMatrix Matrix, param float64) (Matrix, annotations.Annotations) { +func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.AggregateExpr, sortedGrouping []string, inputMatrix Matrix, param float64) (Matrix, annotations.Annotations) { // Keep a copy of the original point slice so that it can be returned to the pool. origMatrix := slices.Clone(inputMatrix) defer func() { @@ -1386,7 +1382,7 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping } for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } // Reset number of samples in memory after each timestamp. @@ -1437,11 +1433,11 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping // evalSubquery evaluates given SubqueryExpr and returns an equivalent // evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set. -func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { +func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { samplesStats := ev.samplesStats // Avoid double counting samples when running a subquery, those samples will be counted in later stage. ev.samplesStats = ev.samplesStats.NewChild() - val, ws := ev.eval(subq) + val, ws := ev.eval(ctx, subq) // But do incorporate the peak from the subquery samplesStats.UpdatePeakFromSubquery(ev.samplesStats) ev.samplesStats = samplesStats @@ -1468,17 +1464,16 @@ func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSele } // eval evaluates the given expression as the given AST expression node requires. -func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotations) { +func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, annotations.Annotations) { // This is the top-level evaluation method. // Thus, we check for timeout/cancellation here. - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 // Create a new span to help investigate inner evaluation performances. - ctxWithSpan, span := otel.Tracer("").Start(ev.ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String()) - ev.ctx = ctxWithSpan + ctx, span := otel.Tracer("").Start(ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String()) defer span.End() switch e := expr.(type) { @@ -1500,7 +1495,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio sortedGrouping = append(sortedGrouping, valueLabel.Val) slices.Sort(sortedGrouping) } - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.aggregationCountValues(e, sortedGrouping, valueLabel.Val, v[0].(Vector), enh) }, e.Expr) } @@ -1510,16 +1505,16 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio // param is the number k for topk/bottomk, or q for quantile. var fParam float64 if param != nil { - val, ws := ev.eval(param) + val, ws := ev.eval(ctx, param) warnings.Merge(ws) fParam = val.(Matrix)[0].Floats[0].F } // Now fetch the data to be aggregated. - val, ws := ev.eval(e.Expr) + val, ws := ev.eval(ctx, e.Expr) warnings.Merge(ws) inputMatrix := val.(Matrix) - result, ws := ev.rangeEvalAgg(e, sortedGrouping, inputMatrix, fParam) + result, ws := ev.rangeEvalAgg(ctx, e, sortedGrouping, inputMatrix, fParam) warnings.Merge(ws) ev.currentSamples = originalNumSamples + result.TotalSamples() ev.samplesStats.UpdatePeak(ev.currentSamples) @@ -1537,7 +1532,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio unwrapParenExpr(&arg) vs, ok := arg.(*parser.VectorSelector) if ok { - return ev.rangeEvalTimestampFunctionOverVectorSelector(vs, call, e) + return ev.rangeEvalTimestampFunctionOverVectorSelector(ctx, vs, call, e) } } @@ -1561,7 +1556,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio matrixArgIndex = i matrixArg = true // Replacing parser.SubqueryExpr with parser.MatrixSelector. - val, totalSamples, ws := ev.evalSubquery(subq) + val, totalSamples, ws := ev.evalSubquery(ctx, subq) e.Args[i] = val warnings.Merge(ws) defer func() { @@ -1576,14 +1571,14 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio // Special handling for functions that work on series not samples. switch e.Func.Name { case "label_replace": - return ev.evalLabelReplace(e.Args) + return ev.evalLabelReplace(ctx, e.Args) case "label_join": - return ev.evalLabelJoin(e.Args) + return ev.evalLabelJoin(ctx, e.Args) } if !matrixArg { // Does not have a matrix argument. - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, annos := call(v, e.Args, enh) return vec, warnings.Merge(annos) }, e.Args...) @@ -1595,7 +1590,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio otherInArgs := make([]Vector, len(e.Args)) for i, e := range e.Args { if i != matrixArgIndex { - val, ws := ev.eval(e) + val, ws := ev.eval(ctx, e) otherArgs[i] = val.(Matrix) otherInArgs[i] = Vector{Sample{}} inArgs[i] = otherInArgs[i] @@ -1609,7 +1604,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio sel := arg.(*parser.MatrixSelector) selVS := sel.VectorSelector.(*parser.VectorSelector) - ws, err := checkAndExpandSeriesSet(ev.ctx, sel) + ws, err := checkAndExpandSeriesSet(ctx, sel) warnings.Merge(ws) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), warnings}) @@ -1639,7 +1634,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio dropName := e.Func.Name != "last_over_time" for i, s := range selVS.Series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } ev.currentSamples -= len(floats) + totalHPointSize(histograms) @@ -1785,10 +1780,10 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio return mat, warnings case *parser.ParenExpr: - return ev.eval(e.Expr) + return ev.eval(ctx, e.Expr) case *parser.UnaryExpr: - val, ws := ev.eval(e.Expr) + val, ws := ev.eval(ctx, e.Expr) mat := val.(Matrix) if e.Op == parser.SUB { for i := range mat { @@ -1809,7 +1804,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio case *parser.BinaryExpr: switch lt, rt := e.LHS.Type(), e.RHS.Type(); { case lt == parser.ValueTypeScalar && rt == parser.ValueTypeScalar: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { val := scalarBinop(e.Op, v[0].(Vector)[0].F, v[1].(Vector)[0].F) return append(enh.Out, Sample{F: val}), nil }, e.LHS, e.RHS) @@ -1822,39 +1817,39 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio } switch e.Op { case parser.LAND: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorAnd(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LOR: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorOr(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LUNLESS: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorUnless(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) default: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case lt == parser.ValueTypeVector && rt == parser.ValueTypeScalar: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) case lt == parser.ValueTypeScalar && rt == parser.ValueTypeVector: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case *parser.NumberLiteral: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, Sample{F: e.Val, Metric: labels.EmptyLabels()}), nil }) @@ -1862,7 +1857,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio return String{V: e.Val, T: ev.startTimestamp}, nil case *parser.VectorSelector: - ws, err := checkAndExpandSeriesSet(ev.ctx, e) + ws, err := checkAndExpandSeriesSet(ctx, e) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } @@ -1871,7 +1866,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) var chkIter chunkenc.Iterator for i, s := range e.Series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } chkIter = s.Iterator(chkIter) @@ -1922,14 +1917,13 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio if ev.startTimestamp != ev.endTimestamp { panic(errors.New("cannot do range evaluation of matrix selector")) } - return ev.matrixSelector(e) + return ev.matrixSelector(ctx, e) case *parser.SubqueryExpr: offsetMillis := durationMilliseconds(e.Offset) rangeMillis := durationMilliseconds(e.Range) newEv := &evaluator{ endTimestamp: ev.endTimestamp - offsetMillis, - ctx: ev.ctx, currentSamples: ev.currentSamples, maxSamples: ev.maxSamples, logger: ev.logger, @@ -1959,7 +1953,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio setOffsetForAtModifier(newEv.startTimestamp, e.Expr) } - res, ws := newEv.eval(e.Expr) + res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats) ev.samplesStats.IncrementSamplesAtTimestamp(ev.endTimestamp, newEv.samplesStats.TotalSamples) @@ -1967,14 +1961,13 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio case *parser.StepInvariantExpr: switch ce := e.Expr.(type) { case *parser.StringLiteral, *parser.NumberLiteral: - return ev.eval(ce) + return ev.eval(ctx, ce) } newEv := &evaluator{ startTimestamp: ev.startTimestamp, endTimestamp: ev.startTimestamp, // Always a single evaluation. interval: ev.interval, - ctx: ev.ctx, currentSamples: ev.currentSamples, maxSamples: ev.maxSamples, logger: ev.logger, @@ -1983,7 +1976,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, } - res, ws := newEv.eval(e.Expr) + res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats) for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { @@ -2059,8 +2052,8 @@ func reuseOrGetFPointSlices(prevSS *Series, numSteps int) (r []FPoint) { return getFPointSlice(numSteps) } -func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.VectorSelector, call FunctionCall, e *parser.Call) (parser.Value, annotations.Annotations) { - ws, err := checkAndExpandSeriesSet(ev.ctx, vs) +func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Context, vs *parser.VectorSelector, call FunctionCall, e *parser.Call) (parser.Value, annotations.Annotations) { + ws, err := checkAndExpandSeriesSet(ctx, vs) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } @@ -2071,7 +2064,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec seriesIterators[i] = storage.NewMemoizedIterator(it, durationMilliseconds(ev.lookbackDelta)) } - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { if vs.Timestamp != nil { // This is a special case for "timestamp()" when the @ modifier is used, to ensure that // we return a point for each time step in this case. @@ -2207,7 +2200,7 @@ func putMatrixSelectorHPointSlice(p []HPoint) { } // matrixSelector evaluates a *parser.MatrixSelector expression. -func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annotations.Annotations) { +func (ev *evaluator) matrixSelector(ctx context.Context, node *parser.MatrixSelector) (Matrix, annotations.Annotations) { var ( vs = node.VectorSelector.(*parser.VectorSelector) @@ -2218,7 +2211,7 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota it = storage.NewBuffer(durationMilliseconds(node.Range)) ) - ws, err := checkAndExpandSeriesSet(ev.ctx, node) + ws, err := checkAndExpandSeriesSet(ctx, node) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } @@ -2226,7 +2219,7 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota var chkIter chunkenc.Iterator series := vs.Series for i, s := range series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } chkIter = s.Iterator(chkIter) diff --git a/promql/functions.go b/promql/functions.go index 9fa7fbe190..8141d2a9e6 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -14,6 +14,7 @@ package promql import ( + "context" "errors" "fmt" "math" @@ -1463,7 +1464,7 @@ func funcChanges(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelp } // label_replace function operates only on series; does not look at timestamps or values. -func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, annotations.Annotations) { +func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { var ( dst = stringFromArg(args[1]) repl = stringFromArg(args[2]) @@ -1479,7 +1480,7 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an panic(fmt.Errorf("invalid destination label name in label_replace(): %s", dst)) } - val, ws := ev.eval(args[0]) + val, ws := ev.eval(ctx, args[0]) matrix := val.(Matrix) lb := labels.NewBuilder(labels.EmptyLabels()) @@ -1520,7 +1521,7 @@ func funcVector(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelpe } // label_join function operates only on series; does not look at timestamps or values. -func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annotations.Annotations) { +func (ev *evaluator) evalLabelJoin(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { var ( dst = stringFromArg(args[1]) sep = stringFromArg(args[2]) @@ -1537,7 +1538,7 @@ func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annot panic(fmt.Errorf("invalid destination label name in label_join(): %s", dst)) } - val, ws := ev.eval(args[0]) + val, ws := ev.eval(ctx, args[0]) matrix := val.(Matrix) srcVals := make([]string, len(srcLabels)) lb := labels.NewBuilder(labels.EmptyLabels()) From 54989ce6fd6f25feb334cb4610e9e65ec700755e Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 21:17:45 +0100 Subject: [PATCH 79/83] PromQL: add short string method To be used in tracing or logging. Signed-off-by: Bryan Boreham --- promql/parser/printer.go | 46 ++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/promql/parser/printer.go b/promql/parser/printer.go index 69f5f082d6..63b1950827 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -72,6 +72,11 @@ func (node *AggregateExpr) String() string { return aggrString } +func (node *AggregateExpr) ShortString() string { + aggrString := node.getAggOpStr() + return aggrString +} + func (node *AggregateExpr) getAggOpStr() string { aggrString := node.Op.String() @@ -95,14 +100,20 @@ func joinLabels(ss []string) string { return strings.Join(ss, ", ") } -func (node *BinaryExpr) String() string { - returnBool := "" +func (node *BinaryExpr) returnBool() string { if node.ReturnBool { - returnBool = " bool" + return " bool" } + return "" +} +func (node *BinaryExpr) String() string { matching := node.getMatchingStr() - return fmt.Sprintf("%s %s%s%s %s", node.LHS, node.Op, returnBool, matching, node.RHS) + return fmt.Sprintf("%s %s%s%s %s", node.LHS, node.Op, node.returnBool(), matching, node.RHS) +} + +func (node *BinaryExpr) ShortString() string { + return fmt.Sprintf("%s%s%s", node.Op, node.returnBool(), node.getMatchingStr()) } func (node *BinaryExpr) getMatchingStr() string { @@ -130,9 +141,13 @@ func (node *Call) String() string { return fmt.Sprintf("%s(%s)", node.Func.Name, node.Args) } -func (node *MatrixSelector) String() string { +func (node *Call) ShortString() string { + return node.Func.Name +} + +func (node *MatrixSelector) atOffset() (string, string) { // Copy the Vector selector before changing the offset - vecSelector := *node.VectorSelector.(*VectorSelector) + vecSelector := node.VectorSelector.(*VectorSelector) offset := "" switch { case vecSelector.OriginalOffset > time.Duration(0): @@ -149,7 +164,13 @@ func (node *MatrixSelector) String() string { case vecSelector.StartOrEnd == END: at = " @ end()" } + return at, offset +} +func (node *MatrixSelector) String() string { + at, offset := node.atOffset() + // Copy the Vector selector before changing the offset + vecSelector := *node.VectorSelector.(*VectorSelector) // Do not print the @ and offset twice. offsetVal, atVal, preproc := vecSelector.OriginalOffset, vecSelector.Timestamp, vecSelector.StartOrEnd vecSelector.OriginalOffset = 0 @@ -163,10 +184,19 @@ func (node *MatrixSelector) String() string { return str } +func (node *MatrixSelector) ShortString() string { + at, offset := node.atOffset() + return fmt.Sprintf("[%s]%s%s", model.Duration(node.Range), at, offset) +} + func (node *SubqueryExpr) String() string { return fmt.Sprintf("%s%s", node.Expr.String(), node.getSubqueryTimeSuffix()) } +func (node *SubqueryExpr) ShortString() string { + return node.getSubqueryTimeSuffix() +} + // getSubqueryTimeSuffix returns the '[:] @ offset ' suffix of the subquery. func (node *SubqueryExpr) getSubqueryTimeSuffix() string { step := "" @@ -208,6 +238,10 @@ func (node *UnaryExpr) String() string { return fmt.Sprintf("%s%s", node.Op, node.Expr) } +func (node *UnaryExpr) ShortString() string { + return node.Op.String() +} + func (node *VectorSelector) String() string { var labelStrings []string if len(node.LabelMatchers) > 1 { From abb0502685ee7ee745c84fa07f246f158fe8b170 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 2 Sep 2024 13:38:05 +0100 Subject: [PATCH 80/83] [ENHANCEMENT] PromQL: Add detail to tracing spans For aggregates, operators, calls, show what operation is performed. Also add an event when series are expanded, typically time spent accessing TSDB. Signed-off-by: Bryan Boreham --- promql/engine.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index 537eafb41e..3094ca3b29 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -991,6 +991,8 @@ func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations if e.Series != nil { return nil, nil } + span := trace.SpanFromContext(ctx) + span.AddEvent("expand start", trace.WithAttributes(attribute.String("selector", e.String()))) series, ws, err := expandSeriesSet(ctx, e.UnexpandedSeriesSet) if e.SkipHistogramBuckets { for i := range series { @@ -998,6 +1000,7 @@ func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations } } e.Series = series + span.AddEvent("expand end", trace.WithAttributes(attribute.Int("num_series", len(series)))) return ws, err } return nil, nil @@ -1475,6 +1478,9 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, // Create a new span to help investigate inner evaluation performances. ctx, span := otel.Tracer("").Start(ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String()) defer span.End() + if ss, ok := expr.(interface{ ShortString() string }); ok { + span.SetAttributes(attribute.String("operation", ss.ShortString())) + } switch e := expr.(type) { case *parser.AggregateExpr: @@ -1849,11 +1855,13 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, } case *parser.NumberLiteral: + span.SetAttributes(attribute.Float64("value", e.Val)) return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, Sample{F: e.Val, Metric: labels.EmptyLabels()}), nil }) case *parser.StringLiteral: + span.SetAttributes(attribute.String("value", e.Val)) return String{V: e.Val, T: ev.startTimestamp}, nil case *parser.VectorSelector: From f2064c7987d34d44c368b48968f845842dfd973d Mon Sep 17 00:00:00 2001 From: Joshua Hesketh Date: Wed, 4 Sep 2024 20:07:16 +1000 Subject: [PATCH 81/83] NH: Do not re-use spans between histograms (#14771) promql, tsdb (histograms): Do not re-use spans between histograms When multiple points exist with the same native histogram schemas they share their spans. This causes a problem when a native histogram (NH) schema is modified (for example, during a Sum) then the other NH's with the same spans are also modified. As such, we should create a new Span for each NH. This will ensure NH's interfaces are safe to use without considering the effect on other histograms. At the moment this doesn't present itself as a problem because in all aggregations and functions operating on native histograms they are copied by the promql query engine first. Signed-off-by: Joshua Hesketh --------- Signed-off-by: Joshua Hesketh --- tsdb/chunkenc/float_histogram.go | 17 ++++- tsdb/chunkenc/float_histogram_test.go | 51 +++++++++++++ tsdb/chunkenc/histogram.go | 21 +++++- tsdb/chunkenc/histogram_test.go | 102 ++++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 2 deletions(-) diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index a5f123bc93..f18eb77dad 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -974,6 +974,7 @@ func (it *floatHistogramIterator) Reset(b []byte) { if it.atFloatHistogramCalled { it.atFloatHistogramCalled = false it.pBuckets, it.nBuckets = nil, nil + it.pSpans, it.nSpans = nil, nil } else { it.pBuckets, it.nBuckets = it.pBuckets[:0], it.nBuckets[:0] } @@ -1069,7 +1070,7 @@ func (it *floatHistogramIterator) Next() ValueType { // The case for the 2nd sample with single deltas is implicitly handled correctly with the double delta code, // so we don't need a separate single delta logic for the 2nd sample. - // Recycle bucket slices that have not been returned yet. Otherwise, copy them. + // Recycle bucket and span slices that have not been returned yet. Otherwise, copy them. // We can always recycle the slices for leading and trailing bits as they are // never returned to the caller. if it.atFloatHistogramCalled { @@ -1088,6 +1089,20 @@ func (it *floatHistogramIterator) Next() ValueType { } else { it.nBuckets = nil } + if len(it.pSpans) > 0 { + newSpans := make([]histogram.Span, len(it.pSpans)) + copy(newSpans, it.pSpans) + it.pSpans = newSpans + } else { + it.pSpans = nil + } + if len(it.nSpans) > 0 { + newSpans := make([]histogram.Span, len(it.nSpans)) + copy(newSpans, it.nSpans) + it.nSpans = newSpans + } else { + it.nSpans = nil + } } tDod, err := readVarbitInt(&it.br) diff --git a/tsdb/chunkenc/float_histogram_test.go b/tsdb/chunkenc/float_histogram_test.go index 689696f5ae..6092c0f636 100644 --- a/tsdb/chunkenc/float_histogram_test.go +++ b/tsdb/chunkenc/float_histogram_test.go @@ -1306,3 +1306,54 @@ func TestFloatHistogramAppendOnlyErrors(t *testing.T) { require.EqualError(t, err, "float histogram counter reset") }) } + +func TestFloatHistogramUniqueSpansAfterNext(t *testing.T) { + // Create two histograms with the same schema and spans. + h1 := &histogram.FloatHistogram{ + Schema: 1, + ZeroThreshold: 1e-100, + Count: 10, + ZeroCount: 2, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 2, 3, 4}, + NegativeSpans: []histogram.Span{ + {Offset: 1, Length: 1}, + }, + NegativeBuckets: []float64{2}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewFloatHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendFloatHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendFloatHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValFloatHistogram, it.Next()) + _, rh1 := it.AtFloatHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValFloatHistogram, it.Next()) + _, rh2 := it.AtFloatHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + + // Check that the spans for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.NegativeSpans[0], &rh2.NegativeSpans[0], "NegativeSpans should be unique between histograms") +} diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index fafae48d3c..f8796d64ec 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -1073,6 +1073,7 @@ func (it *histogramIterator) Reset(b []byte) { if it.atHistogramCalled { it.atHistogramCalled = false it.pBuckets, it.nBuckets = nil, nil + it.pSpans, it.nSpans = nil, nil } else { it.pBuckets = it.pBuckets[:0] it.nBuckets = it.nBuckets[:0] @@ -1185,8 +1186,25 @@ func (it *histogramIterator) Next() ValueType { // The case for the 2nd sample with single deltas is implicitly handled correctly with the double delta code, // so we don't need a separate single delta logic for the 2nd sample. - // Recycle bucket slices that have not been returned yet. Otherwise, + // Recycle bucket and span slices that have not been returned yet. Otherwise, copy them. // copy them. + if it.atFloatHistogramCalled || it.atHistogramCalled { + if len(it.pSpans) > 0 { + newSpans := make([]histogram.Span, len(it.pSpans)) + copy(newSpans, it.pSpans) + it.pSpans = newSpans + } else { + it.pSpans = nil + } + if len(it.nSpans) > 0 { + newSpans := make([]histogram.Span, len(it.nSpans)) + copy(newSpans, it.nSpans) + it.nSpans = newSpans + } else { + it.nSpans = nil + } + } + if it.atHistogramCalled { it.atHistogramCalled = false if len(it.pBuckets) > 0 { @@ -1204,6 +1222,7 @@ func (it *histogramIterator) Next() ValueType { it.nBuckets = nil } } + // FloatBuckets are set from scratch, so simply create empty ones. if it.atFloatHistogramCalled { it.atFloatHistogramCalled = false diff --git a/tsdb/chunkenc/histogram_test.go b/tsdb/chunkenc/histogram_test.go index 59187ed17e..29b77b1583 100644 --- a/tsdb/chunkenc/histogram_test.go +++ b/tsdb/chunkenc/histogram_test.go @@ -1487,6 +1487,108 @@ func TestHistogramAppendOnlyErrors(t *testing.T) { }) } +func TestHistogramUniqueSpansAfterNext(t *testing.T) { + // Create two histograms with the same schema and spans. + h1 := &histogram.Histogram{ + Schema: 1, + ZeroThreshold: 1e-100, + Count: 10, + ZeroCount: 2, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 2, 3, 4}, + NegativeSpans: []histogram.Span{ + {Offset: 1, Length: 1}, + }, + NegativeBuckets: []int64{2}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValHistogram, it.Next()) + _, rh1 := it.AtHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValHistogram, it.Next()) + _, rh2 := it.AtHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + + // Check that the spans for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.NegativeSpans[0], &rh2.NegativeSpans[0], "NegativeSpans should be unique between histograms") +} + +func TestHistogramUniqueSpansAfterNextWithAtFloatHistogram(t *testing.T) { + // Create two histograms with the same schema and spans. + h1 := &histogram.Histogram{ + Schema: 1, + ZeroThreshold: 1e-100, + Count: 10, + ZeroCount: 2, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 2, 3, 4}, + NegativeSpans: []histogram.Span{ + {Offset: 1, Length: 1}, + }, + NegativeBuckets: []int64{2}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValHistogram, it.Next()) + _, rh1 := it.AtFloatHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValHistogram, it.Next()) + _, rh2 := it.AtFloatHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.NegativeSpans, h1.NegativeSpans, "Returned negative spans are as expected") + + // Check that the spans for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.NegativeSpans[0], &rh2.NegativeSpans[0], "NegativeSpans should be unique between histograms") +} + func BenchmarkAppendable(b *testing.B) { // Create a histogram with a bunch of spans and buckets. const ( From befcfadf78f37b7fde32e42f61303e31b67712e9 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Wed, 4 Sep 2024 18:51:09 +0200 Subject: [PATCH 82/83] Fix merge conflicts Fix call to newTestEngine(t) in promql/engine_test.go:3214. `agent` feature-flag it's own cmdline flag now. Remove `scrape.name-escaping-scheme` argument. Signed-off-by: Jan Fajerski --- cmd/prometheus/main.go | 6 +++--- docs/command-line/prometheus.md | 4 ++-- promql/engine_test.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 77c7b5f5dd..12e06c7d6c 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -466,11 +466,11 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) - - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) + a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) + promlogflag.AddFlags(a, &cfg.promlogConfig) a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error { diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 35de1ecdb8..d637312a30 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -55,8 +55,8 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/promql/engine_test.go b/promql/engine_test.go index 4600acee5b..4015d08748 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3211,7 +3211,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { seriesName := "sparse_histogram_series" seriesNameOverTime := "sparse_histogram_series_over_time" - engine := newTestEngine() + engine := newTestEngine(t) ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) From ca1f30bb6e8c025c8f82ae7ad39060aa66975536 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Thu, 29 Aug 2024 14:45:09 -0300 Subject: [PATCH 83/83] Promote Agent mode to it's own cmdline flag Signed-off-by: Arthur Silva Sens --- cmd/prometheus/main.go | 3 --- cmd/prometheus/main_test.go | 8 ++++---- docs/command-line/prometheus.md | 4 ++++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 12e06c7d6c..c0bd136fa8 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -200,9 +200,6 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { case "new-service-discovery-manager": c.enableNewSDManager = true level.Info(logger).Log("msg", "Experimental service discovery manager") - case "agent": - agentMode = true - level.Info(logger).Log("msg", "Experimental agent mode enabled.") case "promql-per-step-stats": c.enablePerStepStats = true level.Info(logger).Log("msg", "Experimental per-step statistics reporting") diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index c827812e60..bc16f26d73 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -348,7 +348,7 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -366,7 +366,7 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} prom.Stderr = &output @@ -393,7 +393,7 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -431,7 +431,7 @@ func TestModeSpecificFlags(t *testing.T) { args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { - args = append(args, "--enable-feature=agent", "--config.file="+agentConfig) + args = append(args, "--agent", "--config.file="+agentConfig) } else { args = append(args, "--config.file="+promConfig) } diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index d637312a30..4e5c444076 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -55,7 +55,11 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | +<<<<<<< HEAD | --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +======= +| --enable-feature | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +>>>>>>> 60b9e9fc1 (Promote Agent mode to it's own cmdline flag) | --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` |