From 34072ff9c16faf2cb7e9123f955a8b57cf3dfeff Mon Sep 17 00:00:00 2001 From: Mickael Carl Date: Sat, 6 May 2023 07:20:58 +0100 Subject: [PATCH 001/112] makefile: let golangci-lint also run on arm64 Signed-off-by: Mickael Carl --- Makefile.common | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.common b/Makefile.common index bc2a07d72..8f3630921 100644 --- a/Makefile.common +++ b/Makefile.common @@ -62,10 +62,10 @@ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= GOLANGCI_LINT_VERSION ?= v1.55.2 -# golangci-lint only supports linux, darwin and windows platforms on i386/amd64. +# golangci-lint only supports linux, darwin and windows platforms on i386/amd64 and arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) - ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) + ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386 arm64)) # If we're in CI and there is an Actions file, that means the linter # is being run in Actions, so we don't need to run it here. ifneq (,$(SKIP_GOLANGCI_LINT)) From 420b324c3e295ba1d26b81d45f2cfa9d25f75b58 Mon Sep 17 00:00:00 2001 From: Mickael Carl Date: Sat, 6 May 2023 08:21:15 +0100 Subject: [PATCH 002/112] docs: add a quick note on linting in contributing guidelines Signed-off-by: Mickael Carl --- CONTRIBUTING.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 57055ef38..f666caae9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,12 @@ go build ./cmd/prometheus/ make test # Make sure all the tests pass before you commit and push :) ``` -We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. +To run a collection of Go linters through [`golangci-lint`](https://github.com/golangci/golangci-lint), do: +```bash +make lint +``` + +If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. See [this section of the golangci-lint documentation](https://golangci-lint.run/usage/false-positives/#nolint-directive) for more information. All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). From 7e8f03663ae91ee99bd231518733d7cf8498ca6e Mon Sep 17 00:00:00 2001 From: "roger.wang" Date: Thu, 29 Feb 2024 09:33:17 +0800 Subject: [PATCH 003/112] Code optimization: The relabel operation is used very frequently, and strconv.FormatInt() with better performance should be used. Signed-off-by: roger.wang --- model/relabel/relabel.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index d29c3d07a..fee39d0b1 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -17,6 +17,7 @@ import ( "crypto/md5" "encoding/binary" "fmt" + "strconv" "strings" "github.com/grafana/regexp" @@ -290,7 +291,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { hash := md5.Sum([]byte(val)) // Use only the last 8 bytes of the hash to give the same result as earlier versions of this code. mod := binary.BigEndian.Uint64(hash[8:]) % cfg.Modulus - lb.Set(cfg.TargetLabel, fmt.Sprintf("%d", mod)) + lb.Set(cfg.TargetLabel, strconv.FormatUint(mod, 10)) case LabelMap: lb.Range(func(l labels.Label) { if cfg.Regex.MatchString(l.Name) { From 2aab70b8390348b2814b7b9c7645f81dcf98a53b Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Wed, 13 Mar 2024 11:28:02 +0200 Subject: [PATCH 004/112] Clarify batch_send_deadline docs This is the time period covered by a batch of samples, when the number of waiting samples is lower than max_samples_per_send. It does not affect timeouts or retries. Co-authored-by: Bartlomiej Plotka Signed-off-by: Federico Leva --- docs/configuration/configuration.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index d751a4084..7d4fbc959 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3661,7 +3661,8 @@ queue_config: [ min_shards: | default = 1 ] # Maximum number of samples per send. [ max_samples_per_send: | default = 2000] - # Maximum time a sample will wait in buffer. + # Maximum time a sample will wait for a send. The sample might wait less + # if the buffer is full. Further time might pass due to potential retries. [ batch_send_deadline: | default = 5s ] # Initial retry delay. Gets doubled for every retry. [ min_backoff: | default = 30ms ] From c5a1cc914856670e65d6537f9f76eefdc111312a Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 29 Nov 2023 17:49:01 +0100 Subject: [PATCH 005/112] chore(tsdb): add a sandboxDir to DBReadOnly, the directory can be used for transient file writes. use it in loadDataAsQueryable to make sure the RO Head doesn't truncate or cut new chunks in data/chunks_head/. add a -sandbox-dir-root flag to "promtool tsdb dump/dump-openmetrics" to control the root of that sandbox dirrectory. Signed-off-by: machine424 --- cmd/promtool/backfill.go | 2 +- cmd/promtool/main.go | 6 ++- cmd/promtool/tsdb.go | 8 ++-- cmd/promtool/tsdb_test.go | 1 + docs/command-line/promtool.md | 2 + tsdb/chunks/head_chunks.go | 27 +++++++++++ tsdb/compact_test.go | 2 +- tsdb/db.go | 45 +++++++++++++----- tsdb/db_test.go | 89 +++++++++++++++++++++++++++++++++-- 9 files changed, 158 insertions(+), 24 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 601c3ced9..79db428c7 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -88,7 +88,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn blockDuration := getCompatibleBlockDuration(maxBlockDuration) mint = blockDuration * (mint / blockDuration) - db, err := tsdb.OpenDBReadOnly(outputDir, nil) + db, err := tsdb.OpenDBReadOnly(outputDir, "", nil) if err != nil { return err } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index a62ae4fbf..e2a7a37f5 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -235,12 +235,14 @@ func main() { tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.") dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() @@ -396,9 +398,9 @@ func main() { os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable))) case tsdbDumpCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) + os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpSandboxDirRoot, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) case tsdbDumpOpenMetricsCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) + os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) // TODO(aSquare14): Work on adding support for custom block size. case openMetricsImportCmd.FullCommand(): os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration)) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index b786c9297..4de3b3e06 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -338,7 +338,7 @@ func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) { } func listBlocks(path string, humanReadable bool) error { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return err } @@ -393,7 +393,7 @@ func getFormatedBytes(bytes int64, humanReadable bool) string { } func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return nil, nil, err } @@ -708,8 +708,8 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. type SeriesSetFormatter func(series storage.SeriesSet) error -func dumpSamples(ctx context.Context, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { - db, err := tsdb.OpenDBReadOnly(path, nil) +func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { + db, err := tsdb.OpenDBReadOnly(dbDir, sandboxDirRoot, nil) if err != nil { return err } diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 36a65d73e..d3c4e8e05 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -64,6 +64,7 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin err := dumpSamples( context.Background(), path, + t.TempDir(), mint, maxt, match, diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 3eceed48f..9ed51fb7c 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -566,6 +566,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | @@ -592,6 +593,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index 087f25fbb..ec5a81263 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -381,6 +381,33 @@ func listChunkFiles(dir string) (map[int]string, error) { return res, nil } +// HardLinkChunkFiles creates hardlinks for chunk files from src to dst. +// It does nothing if src doesn't exist and ensures dst is created if not. +func HardLinkChunkFiles(src, dst string) error { + _, err := os.Stat(src) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return fmt.Errorf("check source chunks dir: %w", err) + } + if err := os.MkdirAll(dst, 0o777); err != nil { + return fmt.Errorf("set up destination chunks dir: %w", err) + } + files, err := listChunkFiles(src) + if err != nil { + return fmt.Errorf("list chunks: %w", err) + } + for _, filePath := range files { + _, fileName := filepath.Split(filePath) + err := os.Link(filepath.Join(src, fileName), filepath.Join(dst, fileName)) + if err != nil { + return fmt.Errorf("hardlink a chunk: %w", err) + } + } + return nil +} + // repairLastChunkFile deletes the last file if it's empty. // Because we don't fsync when creating these files, we could end // up with an empty file at the end during an abrupt shutdown. diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index b2d2ea6e7..f2e9024e0 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -1297,7 +1297,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) diff --git a/tsdb/db.go b/tsdb/db.go index 22292ab16..51e03a014 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -383,26 +383,36 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger - dir string - closers []io.Closer - closed chan struct{} + logger log.Logger + dir string + sandboxDir string + closers []io.Closer + closed chan struct{} } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } + if sandboxDirRoot == "" { + sandboxDirRoot = dir + } + sandboxDir, err := os.MkdirTemp(sandboxDirRoot, "tmp_dbro_sandbox") + if err != nil { + return nil, fmt.Errorf("setting up sandbox dir: %w", err) + } + if l == nil { l = log.NewNopLogger() } return &DBReadOnly{ - logger: l, - dir: dir, - closed: make(chan struct{}), + logger: l, + dir: dir, + sandboxDir: sandboxDir, + closed: make(chan struct{}), }, nil } @@ -491,7 +501,14 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + // Hard link the chunk files to a dir in db.sandboxDir in case the Head needs to truncate some of them + // or cut new ones while replaying the WAL. + // See https://github.com/prometheus/prometheus/issues/11618. + err = chunks.HardLinkChunkFiles(mmappedChunksDir(db.dir), mmappedChunksDir(db.sandboxDir)) + if err != nil { + return nil, err + } + opts.ChunkDirRoot = db.sandboxDir head, err := NewHead(nil, db.logger, nil, nil, opts, NewHeadStats()) if err != nil { return nil, err @@ -519,7 +536,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + opts.ChunkDirRoot = db.sandboxDir head, err = NewHead(nil, db.logger, w, wbl, opts, NewHeadStats()) if err != nil { return nil, err @@ -690,8 +707,14 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { return block, nil } -// Close all block readers. +// Close all block readers and delete the sandbox dir. func (db *DBReadOnly) Close() error { + defer func() { + // Delete the temporary sandbox directory that was created when opening the DB. + if err := os.RemoveAll(db.sandboxDir); err != nil { + level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + } + }() select { case <-db.closed: return ErrClosed diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 71b2f05ac..ca7b005ed 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -25,6 +25,7 @@ import ( "os" "path" "path/filepath" + "runtime" "sort" "strconv" "sync" @@ -2494,7 +2495,7 @@ func TestDBReadOnly(t *testing.T) { } // Open a read only db and ensure that the API returns the same result as the normal DB. - dbReadOnly, err := OpenDBReadOnly(dbDir, logger) + dbReadOnly, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) defer func() { require.NoError(t, dbReadOnly.Close()) }() @@ -2548,10 +2549,14 @@ func TestDBReadOnly(t *testing.T) { // TestDBReadOnlyClosing ensures that after closing the db // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { - dbDir := t.TempDir() - db, err := OpenDBReadOnly(dbDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + sandboxDir := t.TempDir() + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) require.NoError(t, err) + // The sandboxDir was there. + require.DirExists(t, db.sandboxDir) require.NoError(t, db.Close()) + // The sandboxDir was deleted when closing. + require.NoDirExists(t, db.sandboxDir) require.Equal(t, db.Close(), ErrClosed) _, err = db.Blocks() require.Equal(t, err, ErrClosed) @@ -2587,7 +2592,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { } // Flush WAL. - db, err := OpenDBReadOnly(dbDir, logger) + db, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) flush := t.TempDir() @@ -2595,7 +2600,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.NoError(t, db.Close()) // Reopen the DB from the flushed WAL block. - db, err = OpenDBReadOnly(flush, logger) + db, err = OpenDBReadOnly(flush, "", logger) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) }() blocks, err := db.Blocks() @@ -2624,6 +2629,80 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.Equal(t, 1000.0, sum) } +func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { + countChunks := func(dir string) int { + files, err := os.ReadDir(mmappedChunksDir(dir)) + require.NoError(t, err) + return len(files) + } + + dirHash := func(dir string) (hash []byte) { + // Windows requires the DB to be closed: "xxx\lock: The process cannot access the file because it is being used by another process." + // But closing the DB alters the directory in this case (it'll cut a new chunk). + if runtime.GOOS != "windows" { + hash = testutil.DirHash(t, dir) + } + return + } + + spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { + dBDirHash := dirHash(dir) + // Bootsrap a RO db from the same dir and set up a querier. + dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) + require.NoError(t, err) + require.Equal(t, chunksCount, countChunks(dir)) + q, err := dbReadOnly.Querier(math.MinInt, math.MaxInt) + require.NoError(t, err) + require.NoError(t, q.Close()) + require.NoError(t, dbReadOnly.Close()) + // The RO Head doesn't alter RW db chunks_head/. + require.Equal(t, chunksCount, countChunks(dir)) + require.Equal(t, dirHash(dir), dBDirHash) + } + + t.Run("doesn't cut chunks while replaying WAL", func(t *testing.T) { + db := openTestDB(t, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + + // Append until the first mmaped head chunk. + for i := 0; i < 121; i++ { + app := db.Appender(context.Background()) + _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 0) + + // The RW Head should have no problem cutting its own chunk, + // this also proves that a chunk needed to be cut. + require.NotPanics(t, func() { db.ForceHeadMMap() }) + require.Equal(t, 1, countChunks(db.dir)) + }) + + t.Run("doesn't truncate corrupted chunks", func(t *testing.T) { + db := openTestDB(t, nil, nil) + require.NoError(t, db.Close()) + + // Simulate a corrupted chunk: without a header. + _, err := os.Create(path.Join(mmappedChunksDir(db.dir), "000001")) + require.NoError(t, err) + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 1) + + // The RW Head should have no problem truncating its corrupted file: + // this proves that the chunk needed to be truncated. + db, err = Open(db.dir, nil, nil, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + require.NoError(t, err) + require.Equal(t, 0, countChunks(db.dir)) + }) +} + func TestDBCannotSeePartialCommits(t *testing.T) { if defaultIsolationDisabled { t.Skip("skipping test since tsdb isolation is disabled") From 5ab24a06d0870a45364180dbe4d946ef33bed9c4 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Sun, 21 Apr 2024 23:31:50 +0900 Subject: [PATCH 006/112] refactor: add max func to maxTimestamp Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- .../remote/otlptranslator/prometheusremotewrite/helper.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 817cbaba7..85019e272 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -442,10 +442,8 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { } func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - if a > b { - return a - } - return b + resultTimestamp := max(a, b) + return resultTimestamp } // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. From 3d84d4d6dcfb610bf0866718f13e8a3b689bcca0 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:03:19 +0900 Subject: [PATCH 007/112] fix Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- storage/remote/otlptranslator/prometheusremotewrite/helper.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 85019e272..da4ca07d8 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -442,8 +442,7 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { } func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - resultTimestamp := max(a, b) - return resultTimestamp + return max(a, b) } // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. From e7219e3d366bc912381577e7eb5ec256fc13f275 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 09:54:21 +0100 Subject: [PATCH 008/112] Rule Manager: Add `rule_group_last_restore_duration_seconds` to measure restore time per rule group When a rule group changes or prometheus is restarted we need to ensure we restore the active alerts that were firing for a corresponding rule, for that Prometheus uses the `ALERTS_FOR_STATE` series to query the previous state and restore it. If a given rule has high cardinality (think 100s of 1000s for series) this proccess can take a bit of time - this is the first of a series of PRs to improve this problem and I'd like to start with exposing the time it takes to restore a rule group as a gauge. Signed-off-by: gotjosh --- rules/group.go | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/rules/group.go b/rules/group.go index c268d2df7..aafab5544 100644 --- a/rules/group.go +++ b/rules/group.go @@ -230,7 +230,9 @@ func (g *Group) run(ctx context.Context) { g.evalIterationFunc(ctx, g, evalTimestamp) } - g.RestoreForState(time.Now()) + now := time.Now() + g.RestoreForState(now) + g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(time.Since(now).Seconds()) g.shouldRestore = false } @@ -779,17 +781,18 @@ const namespace = "prometheus" // Metrics for rule evaluation. type Metrics struct { - EvalDuration prometheus.Summary - IterationDuration prometheus.Summary - IterationsMissed *prometheus.CounterVec - IterationsScheduled *prometheus.CounterVec - EvalTotal *prometheus.CounterVec - EvalFailures *prometheus.CounterVec - GroupInterval *prometheus.GaugeVec - GroupLastEvalTime *prometheus.GaugeVec - GroupLastDuration *prometheus.GaugeVec - GroupRules *prometheus.GaugeVec - GroupSamples *prometheus.GaugeVec + EvalDuration prometheus.Summary + IterationDuration prometheus.Summary + IterationsMissed *prometheus.CounterVec + IterationsScheduled *prometheus.CounterVec + EvalTotal *prometheus.CounterVec + EvalFailures *prometheus.CounterVec + GroupInterval *prometheus.GaugeVec + GroupLastEvalTime *prometheus.GaugeVec + GroupLastDuration *prometheus.GaugeVec + GroupLastRestoreDuration *prometheus.GaugeVec + GroupRules *prometheus.GaugeVec + GroupSamples *prometheus.GaugeVec } // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, @@ -865,6 +868,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), + GroupLastRestoreDuration: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "rule_group_last_restore_duration_seconds", + Help: "The duration of the last rule group restoration.", + }, + []string{"rule_group"}, + ), GroupRules: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, @@ -894,6 +905,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { m.GroupInterval, m.GroupLastEvalTime, m.GroupLastDuration, + m.GroupLastRestoreDuration, m.GroupRules, m.GroupSamples, ) From 5e638b7f44ee5dfc81b3eea9b0c2189cc504a475 Mon Sep 17 00:00:00 2001 From: tesla59 Date: Wed, 24 Apr 2024 02:56:15 +0530 Subject: [PATCH 009/112] docs: storage.md: clarify storage.tsdb.retention.time description Signed-off-by: tesla59 --- docs/storage.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/storage.md b/docs/storage.md index b4c5b6ada..aae16a170 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -84,8 +84,10 @@ or 31 days, whichever is smaller. Prometheus has several flags that configure local storage. The most important are: - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -- `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. - Overrides `storage.tsdb.retention` if this flag is set to anything other than default. +- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is + set it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` + nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. + Units Supported: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only From 381a77ac1e1ef5e616ad45630dc6700a1916ba1d Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:21:11 +0100 Subject: [PATCH 010/112] Change variable name to `restoreStartTime` from `now` and introduce a log line to record total time Signed-off-by: gotjosh --- rules/group.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rules/group.go b/rules/group.go index aafab5544..27be4b1f4 100644 --- a/rules/group.go +++ b/rules/group.go @@ -230,9 +230,11 @@ func (g *Group) run(ctx context.Context) { g.evalIterationFunc(ctx, g, evalTimestamp) } - now := time.Now() - g.RestoreForState(now) - g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(time.Since(now).Seconds()) + restoreStartTime := time.Now() + g.RestoreForState(restoreStartTime) + totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() + g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) + level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } From d672eda97949aabc57d151705f4cdbc847256626 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:31:18 +0100 Subject: [PATCH 011/112] Add a changelog entry Signed-off-by: gotjosh --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0afd8d702..23d2c89da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 +* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 ## 2.51.2 / 2024-04-09 From 5beb2fe0051fb0ea04e32ab0e0b8bdac86d3ae75 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 15:24:35 +0100 Subject: [PATCH 012/112] Improve the metric description Signed-off-by: gotjosh --- rules/group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/group.go b/rules/group.go index 27be4b1f4..987136a00 100644 --- a/rules/group.go +++ b/rules/group.go @@ -874,7 +874,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { prometheus.GaugeOpts{ Namespace: namespace, Name: "rule_group_last_restore_duration_seconds", - Help: "The duration of the last rule group restoration.", + Help: "The duration of the last alert rules alerts restoration using the `ALERTS_FOR_STATE` series.", }, []string{"rule_group"}, ), From d15869af32f918b9a958c3a73144f4834c935a75 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Wed, 24 Apr 2024 07:41:33 -0700 Subject: [PATCH 013/112] Avoid creating new slices for labels values on postings for matchers (#13958) * Avoid creating new slices for labels values on postings for matchers Signed-off-by: alanprot * refactor Signed-off-by: alanprot --------- Signed-off-by: alanprot --- tsdb/querier.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsdb/querier.go b/tsdb/querier.go index 8ebedfe52..a6763e996 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -331,7 +331,7 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) return nil, err } - var res []string + res := vals[:0] for _, val := range vals { if m.Matches(val) { res = append(res, val) @@ -368,7 +368,7 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma return nil, err } - var res []string + res := vals[:0] // If the inverse match is ="", we just want all the values. if m.Type == labels.MatchEqual && m.Value == "" { res = vals From 4daaa59c081fc63bcdf3d73e77babeb6f5494a53 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 19:40:10 +0100 Subject: [PATCH 014/112] Rule Manager: Only query once per alert rule when restoring alert state Prometheus restores alert state between restarts and updates. For each rule, it looks at the alerts that are meant to be active and then queries the `ALERTS_FOR_STATE` series for _each_ alert within the rules. If the alert rule has 120 instances (or series) it'll execute the same query with slightly different labels. This PR changes the approach so that we only query once per alert rule and then match the corresponding alert that we're about to restore against the series-set. While the approach might use a bit more memory at start-up (if even?) the restore proccess is only ran once per restart so I'd consider this a big win. This builds on top of #13974 Signed-off-by: gotjosh --- rules/alerting.go | 32 +++++++++++++------------------- rules/group.go | 33 +++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 50c67fa2d..1bcf0a034 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -246,13 +246,16 @@ func (r *AlertingRule) sample(alert *Alert, ts time.Time) promql.Sample { return s } -// forStateSample returns the sample for ALERTS_FOR_STATE. +// forStateSample returns a promql.Sample with the rule labels, `ALERTS_FOR_STATE` as the metric name and the rule name as the `alertname` label. +// Optionally, if an alert is provided it'll copy the labels of the alert into the sample labels. func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) promql.Sample { lb := labels.NewBuilder(r.labels) - alert.Labels.Range(func(l labels.Label) { - lb.Set(l.Name, l.Value) - }) + if alert != nil { + alert.Labels.Range(func(l labels.Label) { + lb.Set(l.Name, l.Value) + }) + } lb.Set(labels.MetricName, alertForStateMetricName) lb.Set(labels.AlertName, r.name) @@ -265,9 +268,11 @@ func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) pro return s } -// QueryforStateSeries returns the series for ALERTS_FOR_STATE. -func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q storage.Querier) (storage.Series, error) { - smpl := r.forStateSample(alert, time.Now(), 0) +// QueryforStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. +func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { + // We use a sample to ease the building of matchers. + // Don't provide an alert as we want matchers that match all series for the alert rule. + smpl := r.forStateSample(nil, time.Now(), 0) var matchers []*labels.Matcher smpl.Metric.Range(func(l labels.Label) { mt, err := labels.NewMatcher(labels.MatchEqual, l.Name, l.Value) @@ -278,18 +283,7 @@ func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q }) sset := q.Select(ctx, false, nil, matchers...) - var s storage.Series - for sset.Next() { - // Query assures that smpl.Metric is included in sset.At().Labels(), - // hence just checking the length would act like equality. - // (This is faster than calling labels.Compare again as we already have some info). - if sset.At().Labels().Len() == len(matchers) { - s = sset.At() - break - } - } - - return s, sset.Err() + return sset, sset.Err() } // SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation. diff --git a/rules/group.go b/rules/group.go index 987136a00..81f7b1df2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -664,19 +664,32 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + sset, err := alertRule.QueryforStateSeries(g.opts.Context, q) + if err != nil { + level.Error(g.logger).Log( + "msg", "Failed to restore 'for' state", + labels.AlertName, alertRule.Name(), + "stage", "Select", + "err", err, + ) + continue + } + + // No results for this alert rule. + if err == nil { + level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + continue + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q) - if err != nil { - // Querier Warnings are ignored. We do not care unless we have an error. - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", - labels.AlertName, alertRule.Name(), - "stage", "Select", - "err", err, - ) - return + // Find the series for the given alert from the set. + for sset.Next() { + if sset.At().Labels().Hash() == a.Labels.Hash() { + s = sset.At() + break + } } if s == nil { From e6dcbd2e26b73c6e4e4e0b8bc89c3d33f7de05c3 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 23 Apr 2024 19:49:07 +0100 Subject: [PATCH 015/112] bug: nil check against the series set not errors Signed-off-by: gotjosh --- rules/alerting.go | 2 +- rules/group.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 1bcf0a034..2cadd3ac5 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -281,8 +281,8 @@ func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querie } matchers = append(matchers, mt) }) - sset := q.Select(ctx, false, nil, matchers...) + sset := q.Select(ctx, false, nil, matchers...) return sset, sset.Err() } diff --git a/rules/group.go b/rules/group.go index 81f7b1df2..7afeaf96e 100644 --- a/rules/group.go +++ b/rules/group.go @@ -676,7 +676,7 @@ func (g *Group) RestoreForState(ts time.Time) { } // No results for this alert rule. - if err == nil { + if sset == nil { level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) continue } From 276201598c18b0ddb62defb9badebadc000c4e4a Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 12:27:35 +0100 Subject: [PATCH 016/112] Fix tests and a bug with the series lookup logic. Signed-off-by: gotjosh --- rules/alerting_test.go | 26 ++++--- rules/group.go | 17 ++--- rules/manager_test.go | 162 +++++++++++++++++++++-------------------- 3 files changed, 105 insertions(+), 100 deletions(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index ddfe345ef..366297b4a 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -710,19 +710,21 @@ func TestQueryForStateSeries(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - alert := &Alert{ - State: 0, - Labels: labels.EmptyLabels(), - Annotations: labels.EmptyLabels(), - Value: 0, - ActiveAt: time.Time{}, - FiredAt: time.Time{}, - ResolvedAt: time.Time{}, - LastSentAt: time.Time{}, - ValidUntil: time.Time{}, - } + sample := rule.forStateSample(nil, time.Time{}, 0) + var matchersCount int + sample.Metric.Range(func(l labels.Label) { + matchersCount++ + }) - series, err := rule.QueryforStateSeries(context.Background(), alert, querier) + seriesSet, err := rule.QueryforStateSeries(context.Background(), querier) + + var series storage.Series + for seriesSet.Next() { + if seriesSet.At().Labels().Len() == matchersCount { + series = seriesSet.At() + break + } + } require.Equal(t, tst.expectedSeries, series) require.Equal(t, tst.expectedError, err) diff --git a/rules/group.go b/rules/group.go index 7afeaf96e..411ca68e2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,21 +681,18 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + result := map[uint64]storage.Series{} + for sset.Next() { + result[sset.At().Labels().DropMetricName().Hash()] = sset.At() + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - // Find the series for the given alert from the set. - for sset.Next() { - if sset.At().Labels().Hash() == a.Labels.Hash() { - s = sset.At() - break - } - } - - if s == nil { + s, ok := result[a.Labels.Hash()] + if !ok { return } - // Series found for the 'for' state. var t int64 var v float64 diff --git a/rules/manager_test.go b/rules/manager_test.go index 50ab6b861..a3bd335d1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -407,6 +407,7 @@ func TestForStateRestore(t *testing.T) { // Prometheus goes down here. We create new rules and groups. type testInput struct { + name string restoreDuration time.Duration alerts []*Alert @@ -414,105 +415,110 @@ func TestForStateRestore(t *testing.T) { noRestore bool gracePeriod bool downDuration time.Duration + before func() } tests := []testInput{ { - // Normal restore (alerts were not firing). + name: "normal restore (alerts were not firing)", restoreDuration: 15 * time.Minute, alerts: rule.ActiveAlerts(), downDuration: 10 * time.Minute, }, { - // Testing Outage Tolerance. + name: "outage tolerance", restoreDuration: 40 * time.Minute, noRestore: true, num: 2, }, { - // No active alerts. + name: "no active alerts", restoreDuration: 50 * time.Minute, alerts: []*Alert{}, }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + alerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, } - testFunc := func(tst testInput) { - newRule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, - ) - newGroup := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{newRule}, - ShouldRestore: true, - Opts: opts, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.alerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } }) - - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup - - restoreTime := baseTime.Add(tst.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - - // Checking if we have restored it correctly. - switch { - case tst.noRestore: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tst.gracePeriod: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tst.alerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) - - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } - } } - - for _, tst := range tests { - testFunc(tst) - } - - // Testing the grace period. - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - testFunc(testInput{ - restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, - gracePeriod: true, - num: 2, - }) } func TestStaleness(t *testing.T) { From fa75985c1c4695b077cefdbb9e6fd9ec73ee8c0e Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 14:04:16 +0100 Subject: [PATCH 017/112] Use the string representation of the labels instead of the hash Signed-off-by: gotjosh --- rules/group.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rules/group.go b/rules/group.go index 411ca68e2..4acce6e60 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,15 +681,15 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - result := map[uint64]storage.Series{} + result := map[string]storage.Series{} for sset.Next() { - result[sset.At().Labels().DropMetricName().Hash()] = sset.At() + result[sset.At().Labels().DropMetricName().String()] = sset.At() } alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, ok := result[a.Labels.Hash()] + s, ok := result[a.Labels.String()] if !ok { return } From 6cfc58430829895dbf10e749da9bddd8ae8c25f4 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:02:47 +0100 Subject: [PATCH 018/112] - Add a changelog entry - Improve variable name of the map produced by the series set Signed-off-by: gotjosh --- CHANGELOG.md | 1 + rules/group.go | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d2c89da..58293351c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 +* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 ## 2.51.2 / 2024-04-09 diff --git a/rules/group.go b/rules/group.go index 4acce6e60..c4331a465 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,15 +681,15 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - result := map[string]storage.Series{} + seriesByLabels := map[string]storage.Series{} for sset.Next() { - result[sset.At().Labels().DropMetricName().String()] = sset.At() + seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, ok := result[a.Labels.String()] + s, ok := seriesByLabels[a.Labels.String()] if !ok { return } From 2de2fee0354eaab35e8dbca8388dd6d3d2158220 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:10:34 +0100 Subject: [PATCH 019/112] Allow the result map for the series set before hand with a hint. Signed-off-by: gotjosh --- rules/alerting.go | 7 +++++++ rules/alerting_test.go | 19 +++++++++++++++++++ rules/group.go | 3 ++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/rules/alerting.go b/rules/alerting.go index 2cadd3ac5..e357cc615 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -542,6 +542,13 @@ func (r *AlertingRule) ForEachActiveAlert(f func(*Alert)) { } } +func (r *AlertingRule) ActiveAlertsCount() int { + r.activeMtx.Lock() + defer r.activeMtx.Unlock() + + return len(r.active) +} + func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay, interval time.Duration, notifyFunc NotifyFunc) { alerts := []*Alert{} r.ForEachActiveAlert(func(alert *Alert) { diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 366297b4a..78f6d6715 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1027,3 +1027,22 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { rule.SetNoDependencyRules(true) require.True(t, rule.NoDependencyRules()) } + +func TestAlertingRule_ActiveAlertsCount(t *testing.T) { + rule := NewAlertingRule( + "TestRule", + nil, + time.Minute, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + + // Set an active alert. + lbls := labels.FromStrings("a1", "1") + h := lbls.Hash() + al := &Alert{State: StateFiring, Labels: lbls, ActiveAt: time.Now()} + rule.active[h] = al + + require.Equal(t, 1, t rule.ActiveAlertsCount()) +} diff --git a/rules/group.go b/rules/group.go index c4331a465..28a0ff6e1 100644 --- a/rules/group.go +++ b/rules/group.go @@ -681,7 +681,8 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - seriesByLabels := map[string]storage.Series{} + // While not technically the same number of series we expect, it's as good of an approximation as any. + seriesByLabels := make(map[string]storage.Series, alertRule.ActiveAlertsCount()) for sset.Next() { seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } From cc2207148edf9c348837b2631e4adf6d85a8819b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Wed, 24 Apr 2024 19:20:57 +0100 Subject: [PATCH 020/112] fix typo Signed-off-by: gotjosh --- rules/alerting_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 78f6d6715..a3f97b2a4 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1044,5 +1044,5 @@ func TestAlertingRule_ActiveAlertsCount(t *testing.T) { al := &Alert{State: StateFiring, Labels: lbls, ActiveAt: time.Now()} rule.active[h] = al - require.Equal(t, 1, t rule.ActiveAlertsCount()) + require.Equal(t, 1, rule.ActiveAlertsCount()) } From 7aacef9b42a275f7ad8f60a707abb6de2a5d1448 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Wed, 24 Apr 2024 10:53:54 -0300 Subject: [PATCH 021/112] bugfix: Decouple native histogram ingestions and protobuf parsing Up until this point, if a scrape was done with the protobuf format Prometheus would always try to ingest native histograms even with the feature flag disabled. This causes problems with other feature-flags that depend on the protobuf format, like 'created-timestamp-zero-ingestion'. This commit decouples native histogram parsing from ingestion, making sure ingestion only happens when the 'native-histogram' feature-flag is enabled. Signed-off-by: Arthur Silva Sens --- cmd/prometheus/main.go | 1 + scrape/manager.go | 2 ++ scrape/scrape.go | 64 +++++++++++++++++++++++------------------- scrape/scrape_test.go | 28 ++++++++++++------ 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 0e15d5ca5..8218ffb18 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -217,6 +217,7 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true + c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols diff --git a/scrape/manager.go b/scrape/manager.go index a7a8b828e..cb92db5a8 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -81,6 +81,8 @@ type Options struct { // Option to enable the ingestion of the created timestamp as a synthetic zero sample. // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md EnableCreatedTimestampZeroIngestion bool + // Option to enable the ingestion of native histograms. + EnableNativeHistogramsIngestion bool // Optional HTTP client options to use when scraping. HTTPClientOptions []config_util.HTTPClientOption diff --git a/scrape/scrape.go b/scrape/scrape.go index 4bbeab57a..c285f05e3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -178,6 +178,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.interval, opts.timeout, opts.scrapeClassicHistograms, + options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, options.EnableMetadataStorage, @@ -827,7 +828,10 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool - enableCTZeroIngestion bool + + // Feature flagged options. + enableNativeHistogramIngestion bool + enableCTZeroIngestion bool appender func(ctx context.Context) storage.Appender symbolTable *labels.SymbolTable @@ -1123,6 +1127,7 @@ func newScrapeLoop(ctx context.Context, interval time.Duration, timeout time.Duration, scrapeClassicHistograms bool, + enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, appendMetadataToWAL bool, @@ -1153,33 +1158,34 @@ func newScrapeLoop(ctx context.Context, } sl := &scrapeLoop{ - scraper: sc, - buffers: buffers, - cache: cache, - appender: appender, - symbolTable: symbolTable, - sampleMutator: sampleMutator, - reportSampleMutator: reportSampleMutator, - stopped: make(chan struct{}), - offsetSeed: offsetSeed, - l: l, - parentCtx: ctx, - appenderCtx: appenderCtx, - honorTimestamps: honorTimestamps, - trackTimestampsStaleness: trackTimestampsStaleness, - enableCompression: enableCompression, - sampleLimit: sampleLimit, - bucketLimit: bucketLimit, - maxSchema: maxSchema, - labelLimits: labelLimits, - interval: interval, - timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, - enableCTZeroIngestion: enableCTZeroIngestion, - reportExtraMetrics: reportExtraMetrics, - appendMetadataToWAL: appendMetadataToWAL, - metrics: metrics, - skipOffsetting: skipOffsetting, + scraper: sc, + buffers: buffers, + cache: cache, + appender: appender, + symbolTable: symbolTable, + sampleMutator: sampleMutator, + reportSampleMutator: reportSampleMutator, + stopped: make(chan struct{}), + offsetSeed: offsetSeed, + l: l, + parentCtx: ctx, + appenderCtx: appenderCtx, + honorTimestamps: honorTimestamps, + trackTimestampsStaleness: trackTimestampsStaleness, + enableCompression: enableCompression, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + interval: interval, + timeout: timeout, + scrapeClassicHistograms: scrapeClassicHistograms, + enableNativeHistogramIngestion: enableNativeHistogramIngestion, + enableCTZeroIngestion: enableCTZeroIngestion, + reportExtraMetrics: reportExtraMetrics, + appendMetadataToWAL: appendMetadataToWAL, + metrics: metrics, + skipOffsetting: skipOffsetting, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1627,7 +1633,7 @@ loop: } } - if isHistogram { + if isHistogram && sl.enableNativeHistogramIngestion { if h != nil { ref, err = app.AppendHistogram(ref, lset, t, h, nil) } else { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 20b21936b..51bd377e4 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -678,6 +678,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), @@ -819,6 +820,7 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -962,6 +964,7 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -1571,6 +1574,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { app := &bucketLimitAppender{Appender: resApp, limit: 2} sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = true sl.sampleMutator = func(l labels.Labels) labels.Labels { if l.Has("deleteme") { return labels.EmptyLabels() @@ -1797,14 +1801,15 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { - title string - scrapeClassicHistograms bool - scrapeText string - contentType string - discoveryLabels []string - floats []floatSample - histograms []histogramSample - exemplars []exemplar.Exemplar + title string + scrapeClassicHistograms bool + enableNativeHistogramsIngestion bool + scrapeText string + contentType string + discoveryLabels []string + floats []floatSample + histograms []histogramSample + exemplars []exemplar.Exemplar }{ { title: "Metric without exemplars", @@ -1862,6 +1867,8 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000 }, { title: "Native histogram with three exemplars", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -1976,6 +1983,8 @@ metric: < }, { title: "Native histogram with three exemplars scraped as classic histogram", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -2115,6 +2124,7 @@ metric: < } sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = test.enableNativeHistogramsIngestion sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -3710,7 +3720,7 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(nil, nil, s, reg) + mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) require.NoError(t, err) cfg, err := config.Load(configStr, false, log.NewNopLogger()) require.NoError(t, err) From dde2e5eb73baac260f41e4b808043ba2aa309810 Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 25 Apr 2024 13:18:50 +0100 Subject: [PATCH 022/112] Improve comments around resending resolved alerts (#13990) Signed-off-by: George Robinson --- rules/alerting.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index 50c67fa2d..edcdfe5e0 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -457,8 +457,17 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } } - // If the alert was previously firing, keep it around for a given - // retention time so it is reported as resolved to the AlertManager. + // If the alert is resolved (was firing but is now inactive) keep it for + // at least the retention period. This is important for a number of reasons: + // + // 1. It allows for Prometheus to be more resilient to network issues that + // would otherwise prevent a resolved alert from being reported as resolved + // to Alertmanager. + // + // 2. It helps reduce the chance of resolved notifications being lost if + // Alertmanager crashes or restarts between receiving the resolved alert + // from Prometheus and sending the resolved notification. This tends to + // occur for routes with large Group intervals. if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { delete(r.active, fp) } From 31a4217784a1836a81d498e69e1d9ec2a82cf185 Mon Sep 17 00:00:00 2001 From: Stephen Heckler Date: Thu, 25 Apr 2024 12:33:29 -0500 Subject: [PATCH 023/112] discovery(k8s): Only register client-go metrics adapters when needed Previously the metrics adapters for client-go were registered in an init function. This resulted in clobbering default metrics providers when these packages are imported into an application that leverages the default client-go metrics registry. Instead, let's only register these adapters when requested. Signed-off-by: Stephen Heckler --- discovery/metrics.go | 10 ---------- discovery/metrics_k8s_client.go | 8 ++++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/discovery/metrics.go b/discovery/metrics.go index e738331a1..356be1ddc 100644 --- a/discovery/metrics.go +++ b/discovery/metrics.go @@ -19,16 +19,6 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - clientGoRequestMetrics = &clientGoRequestMetricAdapter{} - clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} -) - -func init() { - clientGoRequestMetrics.RegisterWithK8sGoClient() - clientGoWorkloadMetrics.RegisterWithK8sGoClient() -} - // Metrics to be used with a discovery manager. type Metrics struct { FailedConfigs prometheus.Gauge diff --git a/discovery/metrics_k8s_client.go b/discovery/metrics_k8s_client.go index f16245684..c13ce5331 100644 --- a/discovery/metrics_k8s_client.go +++ b/discovery/metrics_k8s_client.go @@ -35,6 +35,11 @@ const ( workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue" ) +var ( + clientGoRequestMetrics = &clientGoRequestMetricAdapter{} + clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} +) + var ( // Metrics for client-go's HTTP requests. clientGoRequestResultMetricVec = prometheus.NewCounterVec( @@ -135,6 +140,9 @@ func clientGoMetrics() []prometheus.Collector { } func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error { + clientGoRequestMetrics.RegisterWithK8sGoClient() + clientGoWorkloadMetrics.RegisterWithK8sGoClient() + for _, collector := range clientGoMetrics() { err := registerer.Register(collector) if err != nil { From 801314901c91faf9945637680eadf283485f1516 Mon Sep 17 00:00:00 2001 From: Nishant Singh Date: Sat, 27 Apr 2024 13:50:41 +0530 Subject: [PATCH 024/112] Update docs/storage.md Co-authored-by: Ayoub Mrini Signed-off-by: Nishant Singh --- docs/storage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/storage.md b/docs/storage.md index aae16a170..21532119f 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -85,7 +85,7 @@ Prometheus has several flags that configure local storage. The most important ar - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. - `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is - set it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` + set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. Units Supported: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. From c8b23980c9f5b21ada8f9b7e26ea0e2220f7e587 Mon Sep 17 00:00:00 2001 From: Nishant Singh Date: Sat, 27 Apr 2024 13:50:50 +0530 Subject: [PATCH 025/112] Update docs/storage.md Co-authored-by: Ayoub Mrini Signed-off-by: Nishant Singh --- docs/storage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/storage.md b/docs/storage.md index 21532119f..46bb7210e 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -87,7 +87,7 @@ Prometheus has several flags that configure local storage. The most important ar - `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. - Units Supported: y, w, d, h, m, s, ms. + Supported units: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only From f7e923c3bb47071e9d810e31889e7cb3da133862 Mon Sep 17 00:00:00 2001 From: Heyoxe <32708033+Heyoxe@users.noreply.github.com> Date: Sat, 27 Apr 2024 16:01:30 +0200 Subject: [PATCH 026/112] fix(scaleway-sd): use public IPs if no private IP present (#13941) * fix(scaleway-sd): use public IPs if no private IP present * tests(scaleway-sd): add instance with routed public ip and no private ip --------- Signed-off-by: Heyoxe <32708033+Heyoxe@users.noreply.github.com> --- discovery/scaleway/instance.go | 9 +- discovery/scaleway/instance_test.go | 24 +++- discovery/scaleway/testdata/instance.json | 142 +++++++++++++++++++++- docs/configuration/configuration.md | 3 +- 4 files changed, 173 insertions(+), 5 deletions(-) diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index 9dd786c80..6540f06dc 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -174,20 +174,25 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, labels[instanceTagsLabel] = model.LabelValue(tags) } + addr := "" if server.IPv6 != nil { labels[instancePublicIPv6Label] = model.LabelValue(server.IPv6.Address.String()) + addr = server.IPv6.Address.String() } if server.PublicIP != nil { labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) + addr = server.PublicIP.Address.String() } if server.PrivateIP != nil { labels[instancePrivateIPv4Label] = model.LabelValue(*server.PrivateIP) + addr = *server.PrivateIP + } - addr := net.JoinHostPort(*server.PrivateIP, strconv.FormatUint(uint64(d.port), 10)) + if addr != "" { + addr := net.JoinHostPort(addr, strconv.FormatUint(uint64(d.port), 10)) labels[model.AddressLabel] = model.LabelValue(addr) - targets = append(targets, labels) } } diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go index d2449d00c..ae70a9ed2 100644 --- a/discovery/scaleway/instance_test.go +++ b/discovery/scaleway/instance_test.go @@ -60,7 +60,7 @@ api_url: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 2) + require.Len(t, tg.Targets, 3) for i, lbls := range []model.LabelSet{ { @@ -110,6 +110,28 @@ api_url: %s "__meta_scaleway_instance_type": "DEV1-S", "__meta_scaleway_instance_zone": "fr-par-1", }, + { + "__address__": "51.158.183.115:80", + "__meta_scaleway_instance_boot_type": "local", + "__meta_scaleway_instance_hostname": "routed-dualstack", + "__meta_scaleway_instance_id": "4904366a-7e26-4b65-b97b-6392c761247a", + "__meta_scaleway_instance_image_arch": "x86_64", + "__meta_scaleway_instance_image_id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "__meta_scaleway_instance_image_name": "Ubuntu 22.04 Jammy Jellyfish", + "__meta_scaleway_instance_location_cluster_id": "19", + "__meta_scaleway_instance_location_hypervisor_id": "1201", + "__meta_scaleway_instance_location_node_id": "24", + "__meta_scaleway_instance_name": "routed-dualstack", + "__meta_scaleway_instance_organization_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_project_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_public_ipv4": "51.158.183.115", + "__meta_scaleway_instance_region": "nl-ams", + "__meta_scaleway_instance_security_group_id": "984414da-9fc2-49c0-a925-fed6266fe092", + "__meta_scaleway_instance_security_group_name": "Default security group", + "__meta_scaleway_instance_status": "running", + "__meta_scaleway_instance_type": "DEV1-S", + "__meta_scaleway_instance_zone": "nl-ams-1", + }, } { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) diff --git a/discovery/scaleway/testdata/instance.json b/discovery/scaleway/testdata/instance.json index f8d35b215..b433f7598 100644 --- a/discovery/scaleway/testdata/instance.json +++ b/discovery/scaleway/testdata/instance.json @@ -216,6 +216,146 @@ "placement_group": null, "private_nics": [], "zone": "fr-par-1" + }, + { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack", + "arch": "x86_64", + "commercial_type": "DEV1-S", + "boot_type": "local", + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "hostname": "routed-dualstack", + "image": { + "id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "organization": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "project": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "root_volume": { + "id": "13d945b9-5e78-4f9d-8ac4-c4bc2fa7c31a", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "unified", + "size": 10000000000 + }, + "extra_volumes": {}, + "public": true, + "arch": "x86_64", + "creation_date": "2024-02-22T15:52:56.037007+00:00", + "modification_date": "2024-02-22T15:52:56.037007+00:00", + "default_bootscript": null, + "from_server": null, + "state": "available", + "tags": [], + "zone": "nl-ams-1" + }, + "volumes": { + "0": { + "boot": false, + "id": "fe85c817-e67e-4e24-8f13-bde3e9f42620", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "l_ssd", + "export_uri": null, + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "server": { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack" + }, + "size": 20000000000, + "state": "available", + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:50:14.019739+00:00", + "tags": [], + "zone": "nl-ams-1" + } + }, + "tags": [], + "state": "running", + "protected": false, + "state_detail": "booted", + "public_ip": { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + "public_ips": [ + { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + { + "id": "f52a8c81-0875-4aee-b96e-eccfc6bec367", + "address": "2001:bc8:1640:1568:dc00:ff:fe21:91b", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe21:91c", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "slaac", + "tags": [], + "state": "attached", + "ipam_id": "40d1e6ea-e932-42f9-8acb-55398bec7ad6" + } + ], + "mac_address": "de:00:00:21:09:1b", + "routed_ip_enabled": true, + "ipv6": null, + "extra_networks": [], + "dynamic_ip_required": false, + "enable_ipv6": false, + "private_ip": null, + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:52:21.181670+00:00", + "bootscript": { + "id": "5a520dda-96d6-4ed2-acd1-1d526b6859fe", + "public": true, + "title": "x86_64 mainline 4.4.182 rev1", + "architecture": "x86_64", + "organization": "11111111-1111-4111-8111-111111111111", + "project": "11111111-1111-4111-8111-111111111111", + "kernel": "http://10.196.2.9/kernel/x86_64-mainline-lts-4.4-4.4.182-rev1/vmlinuz-4.4.182", + "dtb": "", + "initrd": "http://10.196.2.9/initrd/initrd-Linux-x86_64-v3.14.6.gz", + "bootcmdargs": "LINUX_COMMON scaleway boot=local nbd.max_part=16", + "default": true, + "zone": "nl-ams-1" + }, + "security_group": { + "id": "984414da-9fc2-49c0-a925-fed6266fe092", + "name": "Default security group" + }, + "location": { + "zone_id": "ams1", + "platform_id": "23", + "cluster_id": "19", + "hypervisor_id": "1201", + "node_id": "24" + }, + "maintenances": [], + "allowed_actions": [ + "poweroff", + "terminate", + "reboot", + "stop_in_place", + "backup" + ], + "placement_group": null, + "private_nics": [], + "zone": "nl-ams-1" } ] -} +} \ No newline at end of file diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 2f2e07a0c..a90defc78 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2952,9 +2952,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_scaleway_instance_type`: commercial type of the server * `__meta_scaleway_instance_zone`: the zone of the server (ex: `fr-par-1`, complete list [here](https://developers.scaleway.com/en/products/instance/api/#introduction)) -This role uses the private IPv4 address by default. This can be +This role uses the first address it finds in the following order: private IPv4, public IPv4, public IPv6. This can be changed with relabeling, as demonstrated in [the Prometheus scaleway-sd configuration file](/documentation/examples/prometheus-scaleway.yml). +Should an instance have no address before relabeling, it will not be added to the target list and you will not be able to relabel it. #### Baremetal role From 99f9d32499e6b80b550252b55d430dc7dd171dbb Mon Sep 17 00:00:00 2001 From: Neeraj Gartia <80708727+NeerajGartia21@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:44:01 +0530 Subject: [PATCH 027/112] UTF-8: updates UI parser to support UTF-8 characters (#13590) Signed-off-by: Neeraj Gartia --- .../src/complete/hybrid.test.ts | 48 ++++++ .../codemirror-promql/src/complete/hybrid.ts | 35 ++++- .../codemirror-promql/src/parser/matcher.ts | 82 +++++++--- .../src/parser/parser.test.ts | 143 ++++++++++++++++++ .../codemirror-promql/src/parser/parser.ts | 18 ++- web/ui/module/lezer-promql/src/promql.grammar | 16 +- .../module/lezer-promql/test/expression.txt | 105 ++++++++++--- 7 files changed, 396 insertions(+), 51 deletions(-) diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts index 0f1a8b80a..7b20bfce3 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts @@ -251,6 +251,12 @@ describe('analyzeCompletion test', () => { pos: 11, // cursor is between the bracket after the string myL expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'continue to autocomplete QuotedLabelName in aggregate modifier', + expr: 'sum by ("myL")', + pos: 12, // cursor is between the bracket after the string myL + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName in a list', expr: 'sum by (myLabel1,)', @@ -263,6 +269,12 @@ describe('analyzeCompletion test', () => { pos: 23, // cursor is between the bracket after the string myLab expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'autocomplete labelName in a list 2', + expr: 'sum by ("myLabel1", "myLab")', + pos: 27, // cursor is between the bracket after the string myLab + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName associated to a metric', expr: 'metric_name{}', @@ -299,6 +311,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is between the bracket after the comma expectedContext: [{ kind: ContextKind.LabelName, metricName: '' }], }, + { + title: 'continue to autocomplete quoted labelName associated to a metric', + expr: '{"metric_"}', + pos: 10, // cursor is between the bracket after the string metric_ + expectedContext: [{ kind: ContextKind.MetricName, metricName: 'metric_' }], + }, { title: 'autocomplete the labelValue with metricName + labelName', expr: 'metric_name{labelName=""}', @@ -342,6 +360,30 @@ describe('analyzeCompletion test', () => { }, ], }, + { + title: 'autocomplete the labelValue with metricName + quoted labelName', + expr: 'metric_name{labelName="labelValue", "labelName"!=""}', + pos: 50, // cursor is between the quotes + expectedContext: [ + { + kind: ContextKind.LabelValue, + metricName: 'metric_name', + labelName: 'labelName', + matchers: [ + { + name: 'labelName', + type: Neq, + value: '', + }, + { + name: 'labelName', + type: EqlSingle, + value: 'labelValue', + }, + ], + }, + ], + }, { title: 'autocomplete the labelValue associated to a labelName', expr: '{labelName=""}', @@ -427,6 +469,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is after '!' expectedContext: [{ kind: ContextKind.MatchOp }], }, + { + title: 'autocomplete matchOp 3', + expr: 'metric_name{"labelName"!}', + pos: 24, // cursor is after '!' + expectedContext: [{ kind: ContextKind.BinOp }], + }, { title: 'autocomplete duration with offset', expr: 'http_requests_total offset 5', diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.ts index cf23aa11a..46748d5dc 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.ts @@ -29,7 +29,6 @@ import { GroupingLabels, Gte, Gtr, - LabelMatcher, LabelMatchers, LabelName, Lss, @@ -52,6 +51,9 @@ import { SubqueryExpr, Unless, VectorSelector, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, } from '@prometheus-io/lezer-promql'; import { Completion, CompletionContext, CompletionResult } from '@codemirror/autocomplete'; import { EditorState } from '@codemirror/state'; @@ -181,7 +183,10 @@ export function computeStartCompletePosition(node: SyntaxNode, pos: number): num let start = node.from; if (node.type.id === LabelMatchers || node.type.id === GroupingLabels) { start = computeStartCompleteLabelPositionInLabelMatcherOrInGroupingLabel(node, pos); - } else if (node.type.id === FunctionCallBody || (node.type.id === StringLiteral && node.parent?.type.id === LabelMatcher)) { + } else if ( + node.type.id === FunctionCallBody || + (node.type.id === StringLiteral && (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher)) + ) { // When the cursor is between bracket, quote, we need to increment the starting position to avoid to consider the open bracket/ first string. start++; } else if ( @@ -212,7 +217,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context result.push({ kind: ContextKind.Duration }); break; } - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case the current token is not itself a valid match op yet: // metric_name{labelName!} result.push({ kind: ContextKind.MatchOp }); @@ -380,7 +385,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context // sum by (myL) // So we have to continue to autocomplete any kind of labelName result.push({ kind: ContextKind.LabelName }); - } else if (node.parent?.type.id === LabelMatcher) { + } else if (node.parent?.type.id === UnquotedLabelMatcher) { // In that case we are in the given situation: // metric_name{myL} or {myL} // so we have or to continue to autocomplete any kind of labelName or @@ -389,9 +394,9 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context } break; case StringLiteral: - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case we are in the given situation: - // metric_name{labelName=""} + // metric_name{labelName=""} or metric_name{"labelName"=""} // So we can autocomplete the labelValue // Get the labelName. @@ -399,18 +404,34 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context let labelName = ''; if (node.parent.firstChild?.type.id === LabelName) { labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to); + } else if (node.parent.firstChild?.type.id === QuotedLabelName) { + labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to).slice(1, -1); } // then find the metricName if it exists const metricName = getMetricNameInVectorSelector(node, state); // finally get the full matcher available const matcherNode = walkBackward(node, LabelMatchers); - const labelMatchers = buildLabelMatchers(matcherNode ? matcherNode.getChildren(LabelMatcher) : [], state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matcherNode ? matcherNode.getChildren(labelMatcherOpt) : [], state)); + } result.push({ kind: ContextKind.LabelValue, metricName: metricName, labelName: labelName, matchers: labelMatchers, }); + } else if (node.parent?.parent?.type.id === GroupingLabels) { + // In this case we are in the given situation: + // sum by ("myL") + // So we have to continue to autocomplete any kind of labelName + result.push({ kind: ContextKind.LabelName }); + } else if (node.parent?.parent?.type.id === LabelMatchers) { + // In that case we are in the given situation: + // {""} or {"metric_"} + // since this is for the QuotedMetricName we need to continue to autocomplete for the metric names + result.push({ kind: ContextKind.MetricName, metricName: state.sliceDoc(node.from, node.to).slice(1, -1) }); } break; case NumberLiteral: diff --git a/web/ui/module/codemirror-promql/src/parser/matcher.ts b/web/ui/module/codemirror-promql/src/parser/matcher.ts index f432ffe28..99e2e3969 100644 --- a/web/ui/module/codemirror-promql/src/parser/matcher.ts +++ b/web/ui/module/codemirror-promql/src/parser/matcher.ts @@ -12,33 +12,75 @@ // limitations under the License. import { SyntaxNode } from '@lezer/common'; -import { EqlRegex, EqlSingle, LabelName, MatchOp, Neq, NeqRegex, StringLiteral } from '@prometheus-io/lezer-promql'; +import { + EqlRegex, + EqlSingle, + LabelName, + MatchOp, + Neq, + NeqRegex, + StringLiteral, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, +} from '@prometheus-io/lezer-promql'; import { EditorState } from '@codemirror/state'; import { Matcher } from '../types'; function createMatcher(labelMatcher: SyntaxNode, state: EditorState): Matcher { const matcher = new Matcher(0, '', ''); const cursor = labelMatcher.cursor(); - if (!cursor.next()) { - // weird case, that would mean the labelMatcher doesn't have any child. - return matcher; - } - do { - switch (cursor.type.id) { - case LabelName: - matcher.name = state.sliceDoc(cursor.from, cursor.to); - break; - case MatchOp: - const ope = cursor.node.firstChild; - if (ope) { - matcher.type = ope.type.id; + switch (cursor.type.id) { + case QuotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the QuotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case QuotedLabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; } - break; - case StringLiteral: - matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); - break; - } - } while (cursor.nextSibling()); + } while (cursor.nextSibling()); + break; + case UnquotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the UnquotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case LabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + } + } while (cursor.nextSibling()); + break; + case QuotedLabelName: + matcher.name = '__name__'; + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + matcher.type = EqlSingle; + break; + } return matcher; } diff --git a/web/ui/module/codemirror-promql/src/parser/parser.test.ts b/web/ui/module/codemirror-promql/src/parser/parser.test.ts index 54b95553c..2bc7e67ff 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts @@ -204,6 +204,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and on(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and on() bar', expectedValueType: ValueType.vector, @@ -214,6 +219,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and ignoring(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and ignoring() bar', expectedValueType: ValueType.vector, @@ -229,6 +239,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo / on(test,blub) group_left("bar") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo / ignoring(test,blub) group_left(blub) bar', expectedValueType: ValueType.vector, @@ -825,6 +840,134 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [], }, + { + expr: '{"foo"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // with metric name in the middle + expr: '{a="b","foo",c~="d"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo", a="bc"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"colon:in:the:middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"dot.in.the.middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"😀 in metric name"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"this is \"foo\" metric"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","colon:in:the:middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","dot.in.the.middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","😀 in label name"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"foo","this is \"bar\" label"="val"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: 'foo{"bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 10, + }, + ], + }, + { + expr: '{"foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 23, + }, + ], + }, + { + expr: '{"foo", "__name__"="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 25, + }, + ], + }, + { + expr: '{"__name__"="foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 34, + }, + ], + }, + { + expr: '{"foo", "bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + to: 14, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + }, + ], + }, + { + expr: `{'foo\`metric':'bar'}`, // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{`foo\"metric`=`bar`}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, ]; testCases.forEach((value) => { const state = createEditorState(value.expr); diff --git a/web/ui/module/codemirror-promql/src/parser/parser.ts b/web/ui/module/codemirror-promql/src/parser/parser.ts index 58e56185c..fba7b7b6b 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.ts @@ -27,7 +27,6 @@ import { Gte, Gtr, Identifier, - LabelMatcher, LabelMatchers, Lss, Lte, @@ -36,11 +35,14 @@ import { Or, ParenExpr, Quantile, + QuotedLabelMatcher, + QuotedLabelName, StepInvariantExpr, SubqueryExpr, Topk, UnaryExpr, Unless, + UnquotedLabelMatcher, VectorSelector, } from '@prometheus-io/lezer-promql'; import { containsAtLeastOneChild } from './path-finder'; @@ -282,7 +284,11 @@ export class Parser { private checkVectorSelector(node: SyntaxNode): void { const matchList = node.getChild(LabelMatchers); - const labelMatchers = buildLabelMatchers(matchList ? matchList.getChildren(LabelMatcher) : [], this.state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matchList ? matchList.getChildren(labelMatcherOpt) : [], this.state)); + } let vectorSelectorName = ''; // VectorSelector ( Identifier ) // https://github.com/promlabs/lezer-promql/blob/71e2f9fa5ae6f5c5547d5738966cd2512e6b99a8/src/promql.grammar#L200 @@ -301,6 +307,14 @@ export class Parser { // adding the metric name as a Matcher to avoid a false positive for this kind of expression: // foo{bare=''} labelMatchers.push(new Matcher(EqlSingle, '__name__', vectorSelectorName)); + } else { + // In this case when metric name is not set outside the braces + // It is checking whether metric name is set twice like in : + // {__name__:"foo", "foo"}, {"foo", "bar"} + const labelMatchersMetricName = labelMatchers.filter((lm) => lm.name === '__name__'); + if (labelMatchersMetricName.length > 1) { + this.addDiagnostic(node, `metric name must not be set twice: ${labelMatchersMetricName[0].value} or ${labelMatchersMetricName[1].value}`); + } } // A Vector selector must contain at least one non-empty matcher to prevent diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index 496648317..fd4edddf2 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -97,7 +97,7 @@ binModifiers { } GroupingLabels { - "(" (LabelName ("," LabelName)* ","?)? ")" + "(" ((LabelName | QuotedLabelName) ("," (LabelName | QuotedLabelName))* ","?)? ")" } FunctionCall { @@ -220,7 +220,7 @@ VectorSelector { } LabelMatchers { - "{" (LabelMatcher ("," LabelMatcher)* ","?)? "}" + "{" ((UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName)("," (UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName))* ","?)? "}" } MatchOp { @@ -230,8 +230,16 @@ MatchOp { NeqRegex } -LabelMatcher { - LabelName MatchOp StringLiteral +UnquotedLabelMatcher { + LabelName MatchOp StringLiteral +} + +QuotedLabelMatcher { + QuotedLabelName MatchOp StringLiteral +} + +QuotedLabelName { + StringLiteral } StepInvariantExpr { diff --git a/web/ui/module/lezer-promql/test/expression.txt b/web/ui/module/lezer-promql/test/expression.txt index 2e2b2f40b..daba7d800 100644 --- a/web/ui/module/lezer-promql/test/expression.txt +++ b/web/ui/module/lezer-promql/test/expression.txt @@ -112,6 +112,54 @@ PromQL( ) ) +# Quoted label name in grouping labels + +sum by("job", mode) (test_metric) / on("job") group_left sum by("job")(test_metric) + +==> + +PromQL( + BinaryExpr( + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral), + LabelName + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ), + Div, + MatchingModifierClause( + On, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + GroupLeft + ), + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ) + ) +) + # Case insensitivity for aggregations and binop modifiers. SuM BY(testlabel1) (testmetric1) / IGNOring(testlabel2) AVG withOUT(testlabel3) (testmetric2) @@ -226,25 +274,25 @@ PromQL( VectorSelector( Identifier, LabelMatchers( - LabelMatcher( - LabelName, - MatchOp(EqlSingle), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlSingle), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(Neq), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(Neq), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(EqlRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlRegex), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(NeqRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(NeqRegex), + StringLiteral ) ) ) @@ -571,14 +619,14 @@ PromQL(NumberLiteral) NaN{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Trying to illegally use Inf as a metric name. Inf{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Negative offset @@ -614,3 +662,24 @@ MetricName(Identifier) ==> PromQL(BinaryExpr(NumberLiteral,Add,BinaryExpr(VectorSelector(Identifier),Atan2,VectorSelector(Identifier)))) + +# Testing quoted metric name + +{"metric_name"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral)))) + +# Testing quoted label name + +{"foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) + +# Testing quoted metric name and label name + +{"metric_name", "foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral), QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) \ No newline at end of file From b974a9927923bf40dc580556b7d0fc3db86260a0 Mon Sep 17 00:00:00 2001 From: komisan19 <18901496+komisan19@users.noreply.github.com> Date: Tue, 30 Apr 2024 10:45:20 +0900 Subject: [PATCH 028/112] fix Signed-off-by: komisan19 <18901496+komisan19@users.noreply.github.com> --- .../otlptranslator/prometheusremotewrite/helper.go | 14 +++++--------- .../prometheusremotewrite/metrics_to_prw.go | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index da4ca07d8..d9d80cdc7 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -415,36 +415,32 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } } return ts } -func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - return max(a, b) -} - // addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries, baseName string) { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index fb141034a..7d51b9ee2 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -45,7 +45,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { metric := metricSlice.At(k) - mostRecentTimestamp = maxTimestamp(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) + mostRecentTimestamp = max(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) if !isValidAggregationTemporality(metric) { errs = multierr.Append(errs, fmt.Errorf("invalid temporality and type combination for metric %q", metric.Name())) From 9fda9443d492aaaa5de31e6e1e9bba085be2fc30 Mon Sep 17 00:00:00 2001 From: guangwu Date: Tue, 30 Apr 2024 16:47:10 +0800 Subject: [PATCH 029/112] fix(promql/query_logger): close file in error handling (#13948) Signed-off-by: guoguangwu --- promql/query_logger.go | 2 ++ promql/query_logger_test.go | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/promql/query_logger.go b/promql/query_logger.go index fa4e1fb07..7ddd8c2d5 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -96,12 +96,14 @@ func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io err = file.Truncate(int64(filesize)) if err != nil { + file.Close() level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { + file.Close() level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 4135753fd..376d61b64 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -110,10 +110,7 @@ func TestMMapFile(t *testing.T) { filename := file.Name() defer os.Remove(filename) - fileAsBytes, closer, err := getMMapedFile(filename, 2, nil) - if err != nil { - t.Cleanup(func() { closer.Close() }) - } + fileAsBytes, _, err := getMMapedFile(filename, 2, nil) require.NoError(t, err) copy(fileAsBytes, "ab") From 7554384dac12ed8cb637161843eca0534662d730 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Tue, 30 Apr 2024 11:29:52 +0200 Subject: [PATCH 030/112] otlp: Prometheus to own its own copy of the otlptranslator package (#13991) After a lot of productive discussion between the Prometheus and OpenTelemetry community we decided that it made sense for Prometheus to own its own copy of the code in charge for handling OTLP ingestion traffic. This commit is removing the README and update-copy.sh files that had the previous steps to update the code. Also it is updating the licensing of all the files to make sure the OpenTelemetry provenance is explicit and to state the new ownership. Signed-off-by: Jesus Vazquez Co-authored-by: Arve Knudsen --- storage/remote/otlptranslator/README.md | 22 - .../prometheus/normalize_label.go | 21 +- .../prometheus/normalize_name.go | 21 +- .../otlptranslator/prometheus/unit_to_ucum.go | 21 +- .../prometheusremotewrite/helper.go | 536 +++++++++--------- .../prometheusremotewrite/histograms.go | 77 +-- .../prometheusremotewrite/metrics_to_prw.go | 168 ++++-- .../number_data_points.go | 178 +++--- .../otlp_to_openmetrics_metadata.go | 23 +- storage/remote/otlptranslator/update-copy.sh | 27 - 10 files changed, 596 insertions(+), 498 deletions(-) delete mode 100644 storage/remote/otlptranslator/README.md delete mode 100755 storage/remote/otlptranslator/update-copy.sh diff --git a/storage/remote/otlptranslator/README.md b/storage/remote/otlptranslator/README.md deleted file mode 100644 index 774fac5a7..000000000 --- a/storage/remote/otlptranslator/README.md +++ /dev/null @@ -1,22 +0,0 @@ -## Copying from opentelemetry/opentelemetry-collector-contrib - -This files in the `prometheus/` and `prometheusremotewrite/` are copied from the OpenTelemetry Project[^1]. - -This is done instead of adding a go.mod dependency because OpenTelemetry depends on `prometheus/prometheus` and a cyclic dependency will be created. This is just a temporary solution and the long-term solution is to move the required packages from OpenTelemetry into `prometheus/prometheus`. - -To update the dependency is a multi-step process: -1. Vendor the latest `prometheus/prometheus`@`main` into [`opentelemetry/opentelemetry-collector-contrib`](https://github.com/open-telemetry/opentelemetry-collector-contrib) -1. Update the VERSION in `update-copy.sh`. -1. Run `./update-copy.sh`. - -### Why copy? - -This is because the packages we copy depend on the [`prompb`](https://github.com/prometheus/prometheus/blob/main/prompb) package. While the package is relatively stable, there are still changes. For example, https://github.com/prometheus/prometheus/pull/11935 changed the types. -This means if we depend on the upstream packages directly, we will never able to make the changes like above. Hence we're copying the code for now. - -### I need to manually change these files - -When we do want to make changes to the types in `prompb`, we might need to edit the files directly. That is OK, please let @gouthamve or @jesusvazquez know so they can take care of updating the upstream code (by vendoring in `prometheus/prometheus` upstream and resolving conflicts) and then will run the copy -script again to keep things updated. - -[^1]: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus and https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheusremotewrite diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a6b41d1c3..4f9942bd1 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_label.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index a976dfb48..6cb4fc199 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_name.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" diff --git a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go index 718a52067..1f8bf1a63 100644 --- a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go +++ b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/unit_to_ucum.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import "strings" diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 817cbaba7..eea12b512 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -1,29 +1,42 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/helper.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "encoding/hex" "fmt" "log" "math" + "slices" "sort" "strconv" - "strings" "time" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/timestamp" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" conventions "go.opentelemetry.io/collector/semconv/v1.6.1" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -48,7 +61,7 @@ const ( ) type bucketBoundsData struct { - sig string + ts *prompb.TimeSeries bound float64 } @@ -66,94 +79,47 @@ func (a ByLabelName) Len() int { return len(a) } func (a ByLabelName) Less(i, j int) bool { return a[i].Name < a[j].Name } func (a ByLabelName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -// addSample finds a TimeSeries in tsMap that corresponds to the label set labels, and add sample to the TimeSeries; it -// creates a new TimeSeries in the map if not found and returns the time series signature. -// tsMap will be unmodified if either labels or sample is nil, but can still be modified if the exemplar is nil. -func addSample(tsMap map[string]*prompb.TimeSeries, sample *prompb.Sample, labels []prompb.Label, - datatype string) string { - if sample == nil || labels == nil || tsMap == nil { - // This shouldn't happen - return "" - } - - sig := timeSeriesSignature(datatype, labels) - ts := tsMap[sig] - if ts != nil { - ts.Samples = append(ts.Samples, *sample) - } else { - newTs := &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{*sample}, - } - tsMap[sig] = newTs - } - - return sig -} - -// addExemplars finds a bucket bound that corresponds to the exemplars value and add the exemplar to the specific sig; -// we only add exemplars if samples are presents -// tsMap is unmodified if either of its parameters is nil and samples are nil. -func addExemplars(tsMap map[string]*prompb.TimeSeries, exemplars []prompb.Exemplar, bucketBoundsData []bucketBoundsData) { - if len(tsMap) == 0 || len(bucketBoundsData) == 0 || len(exemplars) == 0 { - return - } - - sort.Sort(byBucketBoundsData(bucketBoundsData)) - - for _, exemplar := range exemplars { - addExemplar(tsMap, bucketBoundsData, exemplar) - } -} - -func addExemplar(tsMap map[string]*prompb.TimeSeries, bucketBounds []bucketBoundsData, exemplar prompb.Exemplar) { - for _, bucketBound := range bucketBounds { - sig := bucketBound.sig - bound := bucketBound.bound - - ts := tsMap[sig] - if ts != nil && len(ts.Samples) > 0 && exemplar.Value <= bound { - ts.Exemplars = append(ts.Exemplars, exemplar) - return - } - } -} - -// timeSeries return a string signature in the form of: -// -// TYPE-label1-value1- ... -labelN-valueN -// -// the label slice should not contain duplicate label names; this method sorts the slice by label name before creating +// timeSeriesSignature returns a hashed label set signature. +// The label slice should not contain duplicate label names; this method sorts the slice by label name before creating // the signature. -func timeSeriesSignature(datatype string, labels []prompb.Label) string { - length := len(datatype) - - for _, lb := range labels { - length += 2 + len(lb.GetName()) + len(lb.GetValue()) - } - - b := strings.Builder{} - b.Grow(length) - b.WriteString(datatype) - +// The algorithm is the same as in Prometheus' labels.StableHash function. +func timeSeriesSignature(labels []prompb.Label) uint64 { sort.Sort(ByLabelName(labels)) - for _, lb := range labels { - b.WriteString("-") - b.WriteString(lb.GetName()) - b.WriteString("-") - b.WriteString(lb.GetValue()) - } + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for i, v := range labels { + if len(b)+len(v.Name)+len(v.Value)+2 >= cap(b) { + // If labels entry is 1KB+ do not allocate whole entry. + h := xxhash.New() + _, _ = h.Write(b) + for _, v := range labels[i:] { + _, _ = h.WriteString(v.Name) + _, _ = h.Write(seps) + _, _ = h.WriteString(v.Value) + _, _ = h.Write(seps) + } + return h.Sum64() + } - return b.String() + b = append(b, v.Name...) + b = append(b, seps[0]) + b = append(b, v.Value...) + b = append(b, seps[0]) + } + return xxhash.Sum64(b) } +var seps = []byte{'\xff'} + // createAttributes creates a slice of Prometheus Labels with OTLP attributes and pairs of string values. -// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen, and overwrites are -// logged. Resulting label names are sanitized. -func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, extras ...string) []prompb.Label { - serviceName, haveServiceName := resource.Attributes().Get(conventions.AttributeServiceName) - instance, haveInstanceID := resource.Attributes().Get(conventions.AttributeServiceInstanceID) +// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen and +// if logOnOverwrite is true, the overwrite is logged. Resulting label names are sanitized. +func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, + ignoreAttrs []string, logOnOverwrite bool, extras ...string) []prompb.Label { + resourceAttrs := resource.Attributes() + serviceName, haveServiceName := resourceAttrs.Get(conventions.AttributeServiceName) + instance, haveInstanceID := resourceAttrs.Get(conventions.AttributeServiceInstanceID) // Calculate the maximum possible number of labels we could return so we can preallocate l maxLabelCount := attributes.Len() + len(externalLabels) + len(extras)/2 @@ -171,9 +137,13 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Ensure attributes are sorted by key for consistent merging of keys which // collide when sanitized. - labels := make([]prompb.Label, 0, attributes.Len()) + labels := make([]prompb.Label, 0, maxLabelCount) + // XXX: Should we always drop service namespace/service name/service instance ID from the labels + // (as they get mapped to other Prometheus labels)? attributes.Range(func(key string, value pcommon.Value) bool { - labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + if !slices.Contains(ignoreAttrs, key) { + labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + } return true }) sort.Stable(ByLabelName(labels)) @@ -190,7 +160,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Map service.name + service.namespace to job if haveServiceName { val := serviceName.AsString() - if serviceNamespace, ok := resource.Attributes().Get(conventions.AttributeServiceNamespace); ok { + if serviceNamespace, ok := resourceAttrs.Get(conventions.AttributeServiceNamespace); ok { val = fmt.Sprintf("%s/%s", serviceNamespace.AsString(), val) } l[model.JobLabel] = val @@ -213,7 +183,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa break } _, found := l[extras[i]] - if found { + if found && logOnOverwrite { log.Println("label " + extras[i] + " is overwritten. Check if Prometheus reserved labels are used.") } // internal labels should be maintained @@ -224,12 +194,12 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa l[name] = extras[i+1] } - s := make([]prompb.Label, 0, len(l)) + labels = labels[:0] for k, v := range l { - s = append(s, prompb.Label{Name: k, Value: v}) + labels = append(labels, prompb.Label{Name: k, Value: v}) } - return s + return labels } // isValidAggregationTemporality checks whether an OTel metric has a valid @@ -249,100 +219,84 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { return false } -// addSingleHistogramDataPoint converts pt to 2 + min(len(ExplicitBounds), len(BucketCount)) + 1 samples. It -// ignore extra buckets if len(ExplicitBounds) > len(BucketCounts) -func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) +func (c *prometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) - createLabels := func(nameSuffix string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) + // If the sum is unset, it indicates the _sum metric point should be + // omitted + if pt.HasSum() { + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + sum.Value = math.Float64frombits(value.StaleNaN) + } + + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) } - // sum, count, and buckets of the histogram should append suffix to baseName - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: baseName + nameSuffix}) - - return labels - } - - // If the sum is unset, it indicates the _sum metric point should be - // omitted - if pt.HasSum() { - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) + count.Value = math.Float64frombits(value.StaleNaN) } - sumlabels := createLabels(sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) - } + // cumulative count for conversion to cumulative histogram + var cumulativeCount uint64 - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } + var bucketBounds []bucketBoundsData - countlabels := createLabels(countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) + // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 + for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { + bound := pt.ExplicitBounds().At(i) + cumulativeCount += pt.BucketCounts().At(i) + bucket := &prompb.Sample{ + Value: float64(cumulativeCount), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + bucket.Value = math.Float64frombits(value.StaleNaN) + } + boundStr := strconv.FormatFloat(bound, 'f', -1, 64) + labels := createLabels(baseName+bucketStr, baseLabels, leStr, boundStr) + ts := c.addSample(bucket, labels) - // cumulative count for conversion to cumulative histogram - var cumulativeCount uint64 - - promExemplars := getPromExemplars[pmetric.HistogramDataPoint](pt) - - var bucketBounds []bucketBoundsData - - // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 - for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { - bound := pt.ExplicitBounds().At(i) - cumulativeCount += pt.BucketCounts().At(i) - bucket := &prompb.Sample{ - Value: float64(cumulativeCount), + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: bound}) + } + // add le=+Inf bucket + infBucket := &prompb.Sample{ Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - bucket.Value = math.Float64frombits(value.StaleNaN) + infBucket.Value = math.Float64frombits(value.StaleNaN) + } else { + infBucket.Value = float64(pt.Count()) } - boundStr := strconv.FormatFloat(bound, 'f', -1, 64) - labels := createLabels(bucketStr, leStr, boundStr) - sig := addSample(tsMap, bucket, labels, metric.Type().String()) + infLabels := createLabels(baseName+bucketStr, baseLabels, leStr, pInfStr) + ts := c.addSample(infBucket, infLabels) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: bound}) - } - // add le=+Inf bucket - infBucket := &prompb.Sample{ - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - infBucket.Value = math.Float64frombits(value.StaleNaN) - } else { - infBucket.Value = float64(pt.Count()) - } - infLabels := createLabels(bucketStr, leStr, pInfStr) - sig := addSample(tsMap, infBucket, infLabels, metric.Type().String()) + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: math.Inf(1)}) + c.addExemplars(pt, bucketBounds) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: math.Inf(1)}) - addExemplars(tsMap, promExemplars, bucketBounds) - - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - labels := createLabels(createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, labels, startTimestamp, pt.Timestamp(), metric.Type().String()) + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + labels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(labels, startTimestamp, pt.Timestamp()) + } } } @@ -448,129 +402,177 @@ func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { return b } -// addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. -func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, - tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) +func (c *prometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, + settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) - createLabels := func(name string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) - - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) - } - - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) - - return labels - } - - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) - } - // sum and count of the summary should append suffix to baseName - sumlabels := createLabels(baseName + sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) - - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } - countlabels := createLabels(baseName + countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) - - // process each percentile/quantile - for i := 0; i < pt.QuantileValues().Len(); i++ { - qt := pt.QuantileValues().At(i) - quantile := &prompb.Sample{ - Value: qt.Value(), + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - quantile.Value = math.Float64frombits(value.StaleNaN) + sum.Value = math.Float64frombits(value.StaleNaN) } - percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) - qtlabels := createLabels(baseName, quantileStr, percentileStr) - addSample(tsMap, quantile, qtlabels, metric.Type().String()) - } + // sum and count of the summary should append suffix to baseName + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - createdLabels := createLabels(baseName + createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + count.Value = math.Float64frombits(value.StaleNaN) + } + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) + + // process each percentile/quantile + for i := 0; i < pt.QuantileValues().Len(); i++ { + qt := pt.QuantileValues().At(i) + quantile := &prompb.Sample{ + Value: qt.Value(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + quantile.Value = math.Float64frombits(value.StaleNaN) + } + percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) + qtlabels := createLabels(baseName, baseLabels, quantileStr, percentileStr) + c.addSample(quantile, qtlabels) + } + + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + createdLabels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) + } } } -// addCreatedTimeSeriesIfNeeded adds {name}_created time series with a single -// sample. If the series exists, then new samples won't be added. -func addCreatedTimeSeriesIfNeeded( - series map[string]*prompb.TimeSeries, - labels []prompb.Label, - startTimestamp pcommon.Timestamp, - timestamp pcommon.Timestamp, - metricType string, -) { - sig := timeSeriesSignature(metricType, labels) - if _, ok := series[sig]; !ok { - series[sig] = &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{ - { // convert ns to ms - Value: float64(convertTimeStamp(startTimestamp)), - Timestamp: convertTimeStamp(timestamp), - }, +// createLabels returns a copy of baseLabels, adding to it the pair model.MetricNameLabel=name. +// If extras are provided, corresponding label pairs are also added to the returned slice. +// If extras is uneven length, the last (unpaired) extra will be ignored. +func createLabels(name string, baseLabels []prompb.Label, extras ...string) []prompb.Label { + extraLabelCount := len(extras) / 2 + labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name + copy(labels, baseLabels) + + n := len(extras) + n -= n % 2 + for extrasIdx := 0; extrasIdx < n; extrasIdx += 2 { + labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) + } + + labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) + return labels +} + +// getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. +// Otherwise it creates a new one and returns that, and true. +func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { + h := timeSeriesSignature(lbls) + ts := c.unique[h] + if ts != nil { + if isSameMetric(ts, lbls) { + // We already have this metric + return ts, false + } + + // Look for a matching conflict + for _, cTS := range c.conflicts[h] { + if isSameMetric(cTS, lbls) { + // We already have this metric + return cTS, false + } + } + + // New conflict + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.conflicts[h] = append(c.conflicts[h], ts) + return ts, true + } + + // This metric is new + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.unique[h] = ts + return ts, true +} + +// addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. +// If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, +// both converted to milliseconds. +func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { + ts, created := c.getOrCreateTimeSeries(lbls) + if created { + ts.Samples = []prompb.Sample{ + { + // convert ns to ms + Value: float64(convertTimeStamp(startTimestamp)), + Timestamp: convertTimeStamp(timestamp), }, } } } -// addResourceTargetInfo converts the resource to the target info metric -func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, tsMap map[string]*prompb.TimeSeries) { - if settings.DisableTargetInfo { +// addResourceTargetInfo converts the resource to the target info metric. +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *prometheusConverter) { + if settings.DisableTargetInfo || timestamp == 0 { return } - // Use resource attributes (other than those used for job+instance) as the - // metric labels for the target info metric - attributes := pcommon.NewMap() - resource.Attributes().CopyTo(attributes) - attributes.RemoveIf(func(k string, _ pcommon.Value) bool { - switch k { - case conventions.AttributeServiceName, conventions.AttributeServiceNamespace, conventions.AttributeServiceInstanceID: - // Remove resource attributes used for job + instance - return true - default: - return false + + attributes := resource.Attributes() + identifyingAttrs := []string{ + conventions.AttributeServiceNamespace, + conventions.AttributeServiceName, + conventions.AttributeServiceInstanceID, + } + nonIdentifyingAttrsCount := attributes.Len() + for _, a := range identifyingAttrs { + _, haveAttr := attributes.Get(a) + if haveAttr { + nonIdentifyingAttrsCount-- } - }) - if attributes.Len() == 0 { + } + if nonIdentifyingAttrsCount == 0 { // If we only have job + instance, then target_info isn't useful, so don't add it. return } - // create parameters for addSample + name := targetMetricName if len(settings.Namespace) > 0 { name = settings.Namespace + "_" + name } - labels := createAttributes(resource, attributes, settings.ExternalLabels, model.MetricNameLabel, name) + + labels := createAttributes(resource, attributes, settings.ExternalLabels, identifyingAttrs, false, model.MetricNameLabel, name) + haveIdentifier := false + for _, l := range labels { + if l.Name == model.JobLabel || l.Name == model.InstanceLabel { + haveIdentifier = true + break + } + } + + if !haveIdentifier { + // We need at least one identifying label to generate target_info. + return + } + sample := &prompb.Sample{ Value: float64(1), // convert ns to ms Timestamp: convertTimeStamp(timestamp), } - addSample(tsMap, sample, labels, infoType) + converter.addSample(sample, labels) } // convertTimeStamp converts OTLP timestamp in ns to timestamp in ms diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 14cea32c3..45f1df123 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -1,58 +1,59 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/histograms.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "fmt" "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) const defaultZeroThreshold = 1e-128 -func addSingleExponentialHistogramDataPoint( - metric string, - pt pmetric.ExponentialHistogramDataPoint, - resource pcommon.Resource, - settings Settings, - series map[string]*prompb.TimeSeries, -) error { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - metric, - ) +func (c *prometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) error { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + baseName, + ) + ts, _ := c.getOrCreateTimeSeries(lbls) - sig := timeSeriesSignature( - pmetric.MetricTypeExponentialHistogram.String(), - labels, - ) - ts, ok := series[sig] - if !ok { - ts = &prompb.TimeSeries{ - Labels: labels, + histogram, err := exponentialToNativeHistogram(pt) + if err != nil { + return err } - series[sig] = ts - } + ts.Histograms = append(ts.Histograms, histogram) - histogram, err := exponentialToNativeHistogram(pt) - if err != nil { - return err + exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - ts.Histograms = append(ts.Histograms, histogram) - - exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) return nil } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index fb141034a..16bdc098b 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -1,19 +1,32 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "errors" "fmt" + "sort" + "strconv" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/multierr" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -27,9 +40,33 @@ type Settings struct { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*prompb.TimeSeries, errs error) { - tsMap = make(map[string]*prompb.TimeSeries) +func FromMetrics(md pmetric.Metrics, settings Settings) (map[string]*prompb.TimeSeries, error) { + c := newPrometheusConverter() + errs := c.fromMetrics(md, settings) + tss := c.timeSeries() + out := make(map[string]*prompb.TimeSeries, len(tss)) + for i := range tss { + out[strconv.Itoa(i)] = &tss[i] + } + return out, errs +} + +// prometheusConverter converts from OTel write format to Prometheus write format. +type prometheusConverter struct { + unique map[uint64]*prompb.TimeSeries + conflicts map[uint64][]*prompb.TimeSeries +} + +func newPrometheusConverter() *prometheusConverter { + return &prometheusConverter{ + unique: map[uint64]*prompb.TimeSeries{}, + conflicts: map[uint64][]*prompb.TimeSeries{}, + } +} + +// fromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) (errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -39,8 +76,7 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp // use with the "target" info metric var mostRecentTimestamp pcommon.Timestamp for j := 0; j < scopeMetricsSlice.Len(); j++ { - scopeMetrics := scopeMetricsSlice.At(j) - metricSlice := scopeMetrics.Metrics() + metricSlice := scopeMetricsSlice.At(j).Metrics() // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { @@ -54,65 +90,125 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) - // handle individual metric based on type + // handle individual metrics based on type //exhaustive:enforce switch metric.Type() { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleGaugeNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addGaugeNumberDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSumNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSumNumberDataPoints(dataPoints, resource, metric, settings, promName) case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleHistogramDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addHistogramDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - errs = multierr.Append( - errs, - addSingleExponentialHistogramDataPoint( - promName, - dataPoints.At(x), - resource, - settings, - tsMap, - ), - ) - } + errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( + dataPoints, + resource, + settings, + promName, + )) case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSummaryDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSummaryDataPoints(dataPoints, resource, settings, promName) default: errs = multierr.Append(errs, errors.New("unsupported metric type")) } } } - addResourceTargetInfo(resource, settings, mostRecentTimestamp, tsMap) + addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) } return } + +// timeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *prometheusConverter) timeSeries() []prompb.TimeSeries { + conflicts := 0 + for _, ts := range c.conflicts { + conflicts += len(ts) + } + allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) + for _, ts := range c.unique { + allTS = append(allTS, *ts) + } + for _, cTS := range c.conflicts { + for _, ts := range cTS { + allTS = append(allTS, *ts) + } + } + + return allTS +} + +func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { + if len(ts.Labels) != len(lbls) { + return false + } + for i, l := range ts.Labels { + if l.Name != ts.Labels[i].Name || l.Value != ts.Labels[i].Value { + return false + } + } + return true +} + +// addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, +// the exemplar is added to the bucket bound's time series, provided that the time series' has samples. +func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { + if len(bucketBounds) == 0 { + return + } + + exemplars := getPromExemplars(dataPoint) + if len(exemplars) == 0 { + return + } + + sort.Sort(byBucketBoundsData(bucketBounds)) + for _, exemplar := range exemplars { + for _, bound := range bucketBounds { + if len(bound.ts.Samples) > 0 && exemplar.Value <= bound.bound { + bound.ts.Exemplars = append(bound.ts.Exemplars, exemplar) + break + } + } + } +} + +// addSample finds a TimeSeries that corresponds to lbls, and adds sample to it. +// If there is no corresponding TimeSeries already, it's created. +// The corresponding TimeSeries is returned. +// If either lbls is nil/empty or sample is nil, nothing is done. +func (c *prometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { + if sample == nil || len(lbls) == 0 { + // This shouldn't happen + return nil + } + + ts, _ := c.getOrCreateTimeSeries(lbls) + ts.Samples = append(ts.Samples, *sample) + return ts +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index b5bd8765f..75c3d9845 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -1,106 +1,110 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/number_data_points.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) -// addSingleGaugeNumberDataPoint converts the Gauge metric data point to a -// Prometheus time series with samples and labels. The result is stored in the -// series map. -func addSingleGaugeNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), +func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + labels := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + c.addSample(sample, labels) } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - addSample(series, sample, labels, metric.Type().String()) } -// addSingleSumNumberDataPoint converts the Sum metric data point to a Prometheus -// time series with samples, labels and exemplars. The result is stored in the -// series map. -func addSingleSumNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), - } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - sig := addSample(series, sample, labels, metric.Type().String()) - - if ts := series[sig]; sig != "" && ts != nil { - exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) - } - - // add _created time series if needed - if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { - startTimestamp := pt.StartTimestamp() - if startTimestamp == 0 { - return +func (c *prometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + ts := c.addSample(sample, lbls) + if ts != nil { + exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - createdLabels := make([]prompb.Label, len(labels)) - copy(createdLabels, labels) - for i, l := range createdLabels { - if l.Name == model.MetricNameLabel { - createdLabels[i].Value = name + createdSuffix - break + // add created time series if needed + if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { + startTimestamp := pt.StartTimestamp() + if startTimestamp == 0 { + return } + + createdLabels := make([]prompb.Label, len(lbls)) + copy(createdLabels, lbls) + for i, l := range createdLabels { + if l.Name == model.MetricNameLabel { + createdLabels[i].Value = name + createdSuffix + break + } + } + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) } - addCreatedTimeSeriesIfNeeded(series, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) } } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index e43797212..ba4870419 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -1,14 +1,25 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) diff --git a/storage/remote/otlptranslator/update-copy.sh b/storage/remote/otlptranslator/update-copy.sh deleted file mode 100755 index 8aa645e0b..000000000 --- a/storage/remote/otlptranslator/update-copy.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -xe - -OTEL_VERSION=v0.95.0 - -git clone https://github.com/open-telemetry/opentelemetry-collector-contrib ./tmp -cd ./tmp -git checkout $OTEL_VERSION -cd .. - -rm -rf ./prometheusremotewrite/* -cp -r ./tmp/pkg/translator/prometheusremotewrite/*.go ./prometheusremotewrite -rm -rf ./prometheusremotewrite/*_test.go - -rm -rf ./prometheus/* -cp -r ./tmp/pkg/translator/prometheus/*.go ./prometheus -rm -rf ./prometheus/*_test.go - -rm -rf ./tmp - -case $(sed --help 2>&1) in - *GNU*) set sed -i;; - *) set sed -i '';; -esac - -"$@" -e 's#github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus#github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus#g' ./prometheusremotewrite/*.go ./prometheus/*.go -"$@" -e '1s#^#// DO NOT EDIT. COPIED AS-IS. SEE ../README.md\n\n#g' ./prometheusremotewrite/*.go ./prometheus/*.go From 0cd3a22a1844ca06ece00e8b99fe534f2ca1b173 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Tue, 30 Apr 2024 17:35:25 +0800 Subject: [PATCH 031/112] docs: [ovh sd] Added missing label for OVH dedicated server in service discovery doc Signed-off-by: Jiekun --- docs/configuration/configuration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 2f2e07a0c..746e9123e 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -1467,6 +1467,7 @@ For OVHcloud's [public cloud instances](https://www.ovhcloud.com/en/public-cloud * `__meta_ovhcloud_dedicated_server_ipv6`: the IPv6 of the server * `__meta_ovhcloud_dedicated_server_link_speed`: the link speed of the server * `__meta_ovhcloud_dedicated_server_name`: the name of the server +* `__meta_ovhcloud_dedicated_server_no_intervention`: whether datacenter intervention is disabled for the server * `__meta_ovhcloud_dedicated_server_os`: the operating system of the server * `__meta_ovhcloud_dedicated_server_rack`: the rack of the server * `__meta_ovhcloud_dedicated_server_reverse`: the reverse DNS name of the server From 99f3051f45ef43735034802e2b56673fe40b2276 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 11:37:00 +0200 Subject: [PATCH 032/112] OTLP: Use PrometheusConverter directly Signed-off-by: Arve Knudsen --- CHANGELOG.md | 1 + .../prometheusremotewrite/helper.go | 10 +++--- .../prometheusremotewrite/histograms.go | 2 +- .../prometheusremotewrite/metrics_to_prw.go | 34 ++++++------------- .../number_data_points.go | 4 +-- storage/remote/write_handler.go | 16 +++------ 6 files changed, 24 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23d2c89da..5cc5b8f40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 +* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 ## 2.51.2 / 2024-04-09 diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index de228e807..68be82e44 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -219,7 +219,7 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { return false } -func (c *prometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, +func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -395,7 +395,7 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { return ts } -func (c *prometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, +func (c *PrometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -468,7 +468,7 @@ func createLabels(name string, baseLabels []prompb.Label, extras ...string) []pr // getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. // Otherwise it creates a new one and returns that, and true. -func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { +func (c *PrometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { h := timeSeriesSignature(lbls) ts := c.unique[h] if ts != nil { @@ -504,7 +504,7 @@ func (c *prometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*promp // addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. // If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, // both converted to milliseconds. -func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { +func (c *PrometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { ts, created := c.getOrCreateTimeSeries(lbls) if created { ts.Samples = []prompb.Sample{ @@ -518,7 +518,7 @@ func (c *prometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTi } // addResourceTargetInfo converts the resource to the target info metric. -func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *prometheusConverter) { +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *PrometheusConverter) { if settings.DisableTargetInfo || timestamp == 0 { return } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 45f1df123..31d343fe4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -30,7 +30,7 @@ import ( const defaultZeroThreshold = 1e-128 -func (c *prometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, +func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, resource pcommon.Resource, settings Settings, baseName string) error { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index a108306ba..2d6aa30a1 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "sort" - "strconv" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -39,34 +38,21 @@ type Settings struct { SendMetadata bool } -// FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func FromMetrics(md pmetric.Metrics, settings Settings) (map[string]*prompb.TimeSeries, error) { - c := newPrometheusConverter() - errs := c.fromMetrics(md, settings) - tss := c.timeSeries() - out := make(map[string]*prompb.TimeSeries, len(tss)) - for i := range tss { - out[strconv.Itoa(i)] = &tss[i] - } - - return out, errs -} - -// prometheusConverter converts from OTel write format to Prometheus write format. -type prometheusConverter struct { +// PrometheusConverter converts from OTel write format to Prometheus write format. +type PrometheusConverter struct { unique map[uint64]*prompb.TimeSeries conflicts map[uint64][]*prompb.TimeSeries } -func newPrometheusConverter() *prometheusConverter { - return &prometheusConverter{ +func NewPrometheusConverter() *PrometheusConverter { + return &PrometheusConverter{ unique: map[uint64]*prompb.TimeSeries{}, conflicts: map[uint64][]*prompb.TimeSeries{}, } } -// fromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) (errs error) { +// FromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -144,8 +130,8 @@ func (c *prometheusConverter) fromMetrics(md pmetric.Metrics, settings Settings) return } -// timeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. -func (c *prometheusConverter) timeSeries() []prompb.TimeSeries { +// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { conflicts := 0 for _, ts := range c.conflicts { conflicts += len(ts) @@ -177,7 +163,7 @@ func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { // addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, // the exemplar is added to the bucket bound's time series, provided that the time series' has samples. -func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { +func (c *PrometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { if len(bucketBounds) == 0 { return } @@ -202,7 +188,7 @@ func (c *prometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, // If there is no corresponding TimeSeries already, it's created. // The corresponding TimeSeries is returned. // If either lbls is nil/empty or sample is nil, nothing is done. -func (c *prometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { +func (c *PrometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { if sample == nil || len(lbls) == 0 { // This shouldn't happen return nil diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index 75c3d9845..aafebc6c4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -27,7 +27,7 @@ import ( "github.com/prometheus/prometheus/prompb" ) -func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, +func (c *PrometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, resource pcommon.Resource, settings Settings, name string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) @@ -57,7 +57,7 @@ func (c *prometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.Number } } -func (c *prometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, +func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index bb6b8423a..ff227292b 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -208,21 +208,15 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - prwMetricsMap, errs := otlptranslator.FromMetrics(req.Metrics(), otlptranslator.Settings{ + converter := otlptranslator.NewPrometheusConverter() + if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, - }) - if errs != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", errs) - } - - prwMetrics := make([]prompb.TimeSeries, 0, len(prwMetricsMap)) - - for _, ts := range prwMetricsMap { - prwMetrics = append(prwMetrics, *ts) + }); err != nil { + level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ - Timeseries: prwMetrics, + Timeseries: converter.TimeSeries(), }) switch { From 918950756988fde81d5315eeed7c628e963cde76 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 11:56:35 +0200 Subject: [PATCH 033/112] prometheusremotewrite: Add PrometheusConverter.FromMetrics benchmark Signed-off-by: Arve Knudsen --- .../metrics_to_prw_test.go | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go new file mode 100644 index 000000000..4797daeec --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -0,0 +1,134 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" +) + +func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { + for _, resourceAttributeCount := range []int{0, 5, 50} { + b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { + for _, histogramCount := range []int{0, 1000} { + b.Run(fmt.Sprintf("histogram count: %v", histogramCount), func(b *testing.B) { + nonHistogramCounts := []int{0, 1000} + + if resourceAttributeCount == 0 && histogramCount == 0 { + // Don't bother running a scenario where we'll generate no series. + nonHistogramCounts = []int{1000} + } + + for _, nonHistogramCount := range nonHistogramCounts { + b.Run(fmt.Sprintf("non-histogram count: %v", nonHistogramCount), func(b *testing.B) { + for _, labelsPerMetric := range []int{2, 20} { + b.Run(fmt.Sprintf("labels per metric: %v", labelsPerMetric), func(b *testing.B) { + for _, exemplarsPerSeries := range []int{0, 5, 10} { + b.Run(fmt.Sprintf("exemplars per series: %v", exemplarsPerSeries), func(b *testing.B) { + payload := createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries) + + for i := 0; i < b.N; i++ { + converter := NewPrometheusConverter() + require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) + require.NotNil(b, converter.TimeSeries()) + } + }) + } + }) + } + }) + } + }) + } + }) + } +} + +func createExportRequest(resourceAttributeCount int, histogramCount int, nonHistogramCount int, labelsPerMetric int, exemplarsPerSeries int) pmetricotlp.ExportRequest { + request := pmetricotlp.NewExportRequest() + + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", resourceAttributeCount) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= histogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyHistogram() + m.SetName(fmt.Sprintf("histogram-%v", i)) + m.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.Histogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + // Set 50 samples, 10 each with values 0.5, 1, 2, 4, and 8 + h.SetCount(50) + h.SetSum(155) + h.BucketCounts().FromRaw([]uint64{10, 10, 10, 10, 10, 0}) + h.ExplicitBounds().FromRaw([]float64{.5, 1, 2, 4, 8, 16}) // Bucket boundaries include the upper limit (ie. each sample is on the upper limit of its bucket) + + generateAttributes(h.Attributes(), "series", labelsPerMetric) + generateExemplars(h.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptySum() + m.SetName(fmt.Sprintf("sum-%v", i)) + m.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + point := m.Sum().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyGauge() + m.SetName(fmt.Sprintf("gauge-%v", i)) + point := m.Gauge().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + return request +} + +func generateAttributes(m pcommon.Map, prefix string, count int) { + for i := 1; i <= count; i++ { + m.PutStr(fmt.Sprintf("%v-name-%v", prefix, i), fmt.Sprintf("value-%v", i)) + } +} + +func generateExemplars(exemplars pmetric.ExemplarSlice, count int, ts pcommon.Timestamp) { + for i := 1; i <= count; i++ { + e := exemplars.AppendEmpty() + e.SetTimestamp(ts) + e.SetDoubleValue(2.22) + e.SetSpanID(pcommon.SpanID{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}) + e.SetTraceID(pcommon.TraceID{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}) + } +} From 151f6e0ed6e9e60c0c667da294c099bc3ece8047 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:17:56 +0100 Subject: [PATCH 034/112] Add an assertion on the count of alerts before adding an active alert Signed-off-by: gotjosh --- rules/alerting_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index a3f97b2a4..28b4f2cdc 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -1038,6 +1038,8 @@ func TestAlertingRule_ActiveAlertsCount(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) + require.Equal(t, 0, rule.ActiveAlertsCount()) + // Set an active alert. lbls := labels.FromStrings("a1", "1") h := lbls.Hash() From ccfafae36d4a0d49a4b099fafbe56eb66ffcb6bb Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:19:18 +0100 Subject: [PATCH 035/112] Rename QueryforStateSeries to QueryForStateSeries Signed-off-by: gotjosh --- rules/alerting.go | 4 ++-- rules/alerting_test.go | 2 +- rules/group.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rules/alerting.go b/rules/alerting.go index e357cc615..fb0fb35b1 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -268,8 +268,8 @@ func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) pro return s } -// QueryforStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. -func (r *AlertingRule) QueryforStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { +// QueryForStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. +func (r *AlertingRule) QueryForStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { // We use a sample to ease the building of matchers. // Don't provide an alert as we want matchers that match all series for the alert rule. smpl := r.forStateSample(nil, time.Now(), 0) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 28b4f2cdc..165f38b86 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -716,7 +716,7 @@ func TestQueryForStateSeries(t *testing.T) { matchersCount++ }) - seriesSet, err := rule.QueryforStateSeries(context.Background(), querier) + seriesSet, err := rule.QueryForStateSeries(context.Background(), querier) var series storage.Series for seriesSet.Next() { diff --git a/rules/group.go b/rules/group.go index 28a0ff6e1..e8a7d82d2 100644 --- a/rules/group.go +++ b/rules/group.go @@ -664,7 +664,7 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - sset, err := alertRule.QueryforStateSeries(g.opts.Context, q) + sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) if err != nil { level.Error(g.logger).Log( "msg", "Failed to restore 'for' state", From 63b09944b8502a64cb17330161c8520d24874aa4 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:25:48 +0100 Subject: [PATCH 036/112] Use labels.Len() instead of manually counting the labels Signed-off-by: gotjosh --- rules/alerting_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 165f38b86..5fae3edd1 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -711,16 +711,12 @@ func TestQueryForStateSeries(t *testing.T) { ) sample := rule.forStateSample(nil, time.Time{}, 0) - var matchersCount int - sample.Metric.Range(func(l labels.Label) { - matchersCount++ - }) seriesSet, err := rule.QueryForStateSeries(context.Background(), querier) var series storage.Series for seriesSet.Next() { - if seriesSet.At().Labels().Len() == matchersCount { + if seriesSet.At().Labels().Len() == sample.Metric.Len() { series = seriesSet.At() break } From f63dbc3db2abee643c7e182690be5712c42f13ca Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:39:07 +0100 Subject: [PATCH 037/112] Remove duplicated sorted and assignment of expected alerts. Signed-off-by: gotjosh --- rules/manager_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index a3bd335d1..d74a7dfe1 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -397,14 +397,6 @@ func TestForStateRestore(t *testing.T) { group.Eval(context.TODO(), evalTime) } - exp := rule.ActiveAlerts() - for _, aa := range exp { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(exp, func(i, j int) bool { - return labels.Compare(exp[i].Labels, exp[j].Labels) < 0 - }) - // Prometheus goes down here. We create new rules and groups. type testInput struct { name string From 05ca082b079bb12dc7ad0f8c3940774df803401b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 12:43:09 +0100 Subject: [PATCH 038/112] Rename `alerts` to `expectedAlerts` in the test case input Signed-off-by: gotjosh --- rules/manager_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index d74a7dfe1..07159145f 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -401,7 +401,7 @@ func TestForStateRestore(t *testing.T) { type testInput struct { name string restoreDuration time.Duration - alerts []*Alert + expectedAlerts []*Alert num int noRestore bool @@ -414,7 +414,7 @@ func TestForStateRestore(t *testing.T) { { name: "normal restore (alerts were not firing)", restoreDuration: 15 * time.Minute, - alerts: rule.ActiveAlerts(), + expectedAlerts: rule.ActiveAlerts(), downDuration: 10 * time.Minute, }, { @@ -426,12 +426,12 @@ func TestForStateRestore(t *testing.T) { { name: "no active alerts", restoreDuration: 50 * time.Minute, - alerts: []*Alert{}, + expectedAlerts: []*Alert{}, }, { name: "test the grace period", restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, + expectedAlerts: []*Alert{}, gracePeriod: true, before: func() { for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { @@ -496,7 +496,7 @@ func TestForStateRestore(t *testing.T) { require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) } default: - exp := tt.alerts + exp := tt.expectedAlerts require.Equal(t, len(exp), len(got)) sortAlerts(exp) sortAlerts(got) From 379dec9d368edbf083b5a7da9e8aa02e94bb079b Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 30 Apr 2024 13:09:30 +0100 Subject: [PATCH 039/112] querier.Select cannot return a nil series set. Signed-off-by: gotjosh --- rules/group.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rules/group.go b/rules/group.go index e8a7d82d2..3a97c9564 100644 --- a/rules/group.go +++ b/rules/group.go @@ -675,18 +675,18 @@ func (g *Group) RestoreForState(ts time.Time) { continue } - // No results for this alert rule. - if sset == nil { - level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) - continue - } - // While not technically the same number of series we expect, it's as good of an approximation as any. seriesByLabels := make(map[string]storage.Series, alertRule.ActiveAlertsCount()) for sset.Next() { seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() } + // No results for this alert rule. + if len(seriesByLabels) == 0 { + level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + continue + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series From 965f507db506f90713aebfd8da46e27feb018ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Tue, 30 Apr 2024 16:22:16 +0200 Subject: [PATCH 040/112] ci: check generated parser code before running unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the generated parser code is consistent with the input definition. Remove the file before re-generating to make sure that missing goyacc is not effecting the check. Fixes: #7488 Signed-off-by: György Krajcsovits --- .github/workflows/ci.yml | 13 +++++++++++++ Makefile | 27 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 714faf167..41043f6c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -143,6 +143,19 @@ jobs: with: parallelism: 12 thread: ${{ matrix.thread }} + check_generated_parser: + name: Check generated parser + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - name: Install Go + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + with: + cache: false + go-version: 1.22.x + - name: Run goyacc and check for diff + run: make install-goyacc check-generated-parser golangci: name: golangci-lint runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 61e8f4377..5dcebfd1a 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ TSDB_BENCHMARK_DATASET ?= ./tsdb/testdata/20kseries.json TSDB_BENCHMARK_OUTPUT_DIR ?= ./benchout GOLANGCI_LINT_OPTS ?= --timeout 4m +GOYACC_VERSION ?= v0.6.0 include Makefile.common @@ -78,24 +79,42 @@ assets-tarball: assets @echo '>> packaging assets' scripts/package_assets.sh -# We only want to generate the parser when there's changes to the grammar. .PHONY: parser parser: @echo ">> running goyacc to generate the .go file." ifeq (, $(shell command -v goyacc 2> /dev/null)) @echo "goyacc not installed so skipping" - @echo "To install: go install golang.org/x/tools/cmd/goyacc@v0.6.0" + @echo "To install: \"go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION)\" or run \"make install-goyacc\"" else - goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + $(MAKE) promql/parser/generated_parser.y.go endif +promql/parser/generated_parser.y.go: promql/parser/generated_parser.y + @echo ">> running goyacc to generate the .go file." + @goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + +.PHONY: clean-parser +clean-parser: + @echo ">> cleaning generated parser" + @rm -f promql/parser/generated_parser.y.go + +.PHONY: check-generated-parser +check-generated-parser: clean-parser promql/parser/generated_parser.y.go + @echo ">> checking generated parser" + @git diff --exit-code -- promql/parser/generated_parser.y.go || (echo "Generated parser is out of date. Please run 'make parser' and commit the changes." && false) + +.PHONY: install-goyacc +install-goyacc: + @echo ">> installing goyacc $(GOYACC_VERSION)" + @go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION) + .PHONY: test # If we only want to only test go code we have to change the test target # which is called by all. ifeq ($(GO_ONLY),1) test: common-test check-go-mod-version else -test: common-test ui-build-module ui-test ui-lint check-go-mod-version +test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif .PHONY: npm_licenses From 12e317786b7ac864117f4be1a88a1aa29e5dcf9e Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 30 Apr 2024 18:44:27 +0200 Subject: [PATCH 041/112] Add missing OTLP fixes to changelog (#14014) Signed-off-by: Arve Knudsen --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a4d478ca..ea5fc9127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 +* [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991 +* [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991 ## 2.51.2 / 2024-04-09 From fc34570b06641cc324fd933ff118e5208fe15871 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 1 May 2024 12:37:58 +0200 Subject: [PATCH 042/112] prometheusremotewrite: Move TimeSeries method to timeseries.go To facilitate generating OTel translation code for other Prometheus compatible backends, modify the prometheusremotewrite sources slightly so that the PrometheusConverter.TimeSeries method is in a file called timeseries.go. The rationale is to allow other backends to define their own implementation of this method. Signed-off-by: Arve Knudsen --- .../prometheusremotewrite/metrics_to_prw.go | 21 +--------- .../metrics_to_prw_test.go | 2 +- .../prometheusremotewrite/timeseries.go | 41 +++++++++++++++++++ 3 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 storage/remote/otlptranslator/prometheusremotewrite/timeseries.go diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 2d6aa30a1..65dac99c5 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -38,7 +38,7 @@ type Settings struct { SendMetadata bool } -// PrometheusConverter converts from OTel write format to Prometheus write format. +// PrometheusConverter converts from OTel write format to Prometheus remote write format. type PrometheusConverter struct { unique map[uint64]*prompb.TimeSeries conflicts map[uint64][]*prompb.TimeSeries @@ -130,25 +130,6 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) return } -// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. -func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { - conflicts := 0 - for _, ts := range c.conflicts { - conflicts += len(ts) - } - allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) - for _, ts := range c.unique { - allTS = append(allTS, *ts) - } - for _, cTS := range c.conflicts { - for _, ts := range cTS { - allTS = append(allTS, *ts) - } - } - - return allTS -} - func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { if len(ts.Labels) != len(lbls) { return false diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go index 4797daeec..37ac67774 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -64,7 +64,7 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { } } -func createExportRequest(resourceAttributeCount int, histogramCount int, nonHistogramCount int, labelsPerMetric int, exemplarsPerSeries int) pmetricotlp.ExportRequest { +func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries int) pmetricotlp.ExportRequest { request := pmetricotlp.NewExportRequest() rm := request.Metrics().ResourceMetrics().AppendEmpty() diff --git a/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go new file mode 100644 index 000000000..fe973761a --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: +// https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "github.com/prometheus/prometheus/prompb" +) + +// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { + conflicts := 0 + for _, ts := range c.conflicts { + conflicts += len(ts) + } + allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) + for _, ts := range c.unique { + allTS = append(allTS, *ts) + } + for _, cTS := range c.conflicts { + for _, ts := range cTS { + allTS = append(allTS, *ts) + } + } + + return allTS +} From f09cf2d9fb54b3b2ae407e58d18588149db3bff8 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Thu, 2 May 2024 00:10:02 +0200 Subject: [PATCH 043/112] Update promu Update promu to the latest release. Signed-off-by: SuperQ --- Makefile.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.common b/Makefile.common index 0acfb9d80..0e9ace29b 100644 --- a/Makefile.common +++ b/Makefile.common @@ -55,7 +55,7 @@ ifneq ($(shell command -v gotestsum 2> /dev/null),) endif endif -PROMU_VERSION ?= 0.15.0 +PROMU_VERSION ?= 0.17.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := From ff1bcdb7b9b46a5ce06362233bf3929683c36038 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:03:44 +0000 Subject: [PATCH 044/112] build(deps): bump golangci/golangci-lint-action in /scripts Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 4.0.0 to 5.1.0. - [Release notes](https://github.com/golangci/golangci-lint-action/releases) - [Commits](https://github.com/golangci/golangci-lint-action/compare/3cfe3a4abbb849e10058ce4af15d205b6da42804...9d1e0624a798bb64f6c3cea93db47765312263dc) --- updated-dependencies: - dependency-name: golangci/golangci-lint-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- scripts/golangci-lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index a7a40c1be..b36f71c3c 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -33,6 +33,6 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 with: version: v1.56.2 From 1821803720418622311f382875d1cc9080dc2879 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:05:44 +0000 Subject: [PATCH 045/112] build(deps): bump actions/checkout from 4.1.2 to 4.1.4 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.2 to 4.1.4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/9bb56186c3b09b4f86b1c65136769dd318469633...0ad4b8fadaa221de15dcec353f45205ec38ea70b) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/buf-lint.yml | 2 +- .github/workflows/buf.yml | 2 +- .github/workflows/ci.yml | 26 ++++++++++----------- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/container_description.yml | 4 ++-- .github/workflows/repo_sync.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 806a706e1..fe8c4704b 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,7 +12,7 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 0fbd01f53..2156e8f19 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41043f6c6..cead7abfd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: # should also be updated. image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 @@ -27,7 +27,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... @@ -43,7 +43,7 @@ jobs: # The go version in this image should be N-1 wrt test_go. image: quay.io/prometheus/golang-builder:1.21-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -57,7 +57,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment with: @@ -74,7 +74,7 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -91,7 +91,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -114,7 +114,7 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: @@ -137,7 +137,7 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: @@ -148,7 +148,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: @@ -161,7 +161,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: @@ -188,7 +188,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_main with: @@ -202,7 +202,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_release with: @@ -217,7 +217,7 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - name: Install nodejs uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 836fb2568..561c22eab 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Initialize CodeQL uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index d0368eaa1..a7d7e150c 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -17,7 +17,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -37,7 +37,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index 3458d7b11..f1c7ca5d0 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 51ff643ab..ea13499d5 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # tag=v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # tag=v4.1.4 with: persist-credentials: false From 781815f064a37391c5ff8327863b1a479f93e891 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:05:58 +0000 Subject: [PATCH 046/112] build(deps): bump actions/upload-artifact from 4.3.1 to 4.3.3 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.1 to 4.3.3. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/5d5d22a31266ced268874388b861e4b58bb5c2f3...65462800fd760344b1a7b4382951275a0abb4808) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/fuzzing.yml | 2 +- .github/workflows/scorecards.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 4c19563eb..dc510e596 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 51ff643ab..3f03c0c55 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3 with: name: SARIF file path: results.sarif From b90e1dfe6a7131580b289ef89b1daffb0b3447f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:21:27 +0000 Subject: [PATCH 047/112] build(deps): bump github.com/prometheus/prometheus Bumps [github.com/prometheus/prometheus](https://github.com/prometheus/prometheus) from 0.51.1 to 0.51.2. - [Release notes](https://github.com/prometheus/prometheus/releases) - [Changelog](https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md) - [Commits](https://github.com/prometheus/prometheus/compare/v0.51.1...v0.51.2) --- updated-dependencies: - dependency-name: github.com/prometheus/prometheus dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- documentation/examples/remote_storage/go.mod | 2 +- documentation/examples/remote_storage/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index dff988131..1ab2cec13 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -10,7 +10,7 @@ require ( github.com/influxdata/influxdb v1.11.5 github.com/prometheus/client_golang v1.19.0 github.com/prometheus/common v0.53.0 - github.com/prometheus/prometheus v0.51.1 + github.com/prometheus/prometheus v0.51.2 github.com/stretchr/testify v1.9.0 ) diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index b145f362f..9506ae638 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -279,8 +279,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/prometheus v0.51.1 h1:V2e7x2oiUC0Megp26+xjffxBf9EGkyP1iQuGd4VjUSU= -github.com/prometheus/prometheus v0.51.1/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= +github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w= +github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25 h1:/8rfZAdFfafRXOgz+ZpMZZWZ5pYggCY9t7e/BvjaBHM= From 753fdd513a5470f4aebefbadc0ae213ace8c6b9e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:31:06 +0000 Subject: [PATCH 048/112] build(deps): bump github.com/linode/linodego from 1.32.0 to 1.33.0 Bumps [github.com/linode/linodego](https://github.com/linode/linodego) from 1.32.0 to 1.33.0. - [Release notes](https://github.com/linode/linodego/releases) - [Commits](https://github.com/linode/linodego/compare/v1.32.0...v1.33.0) --- updated-dependencies: - dependency-name: github.com/linode/linodego dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 165540e7f..0f279036f 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.8 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.32.0 + github.com/linode/linodego v1.33.0 github.com/miekg/dns v1.1.59 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f diff --git a/go.sum b/go.sum index 4b8602b05..457d9a66e 100644 --- a/go.sum +++ b/go.sum @@ -471,8 +471,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.32.0 h1:OmZzB3iON6uu84VtLFf64uKmAQqJJarvmsVguroioPI= -github.com/linode/linodego v1.32.0/go.mod h1:y8GDP9uLVH4jTB9qyrgw79qfKdYJmNCGUOJmfuiOcmI= +github.com/linode/linodego v1.33.0 h1:cX2FYry7r6CA1ujBMsdqiM4VhvIQtnWsOuVblzfBhCw= +github.com/linode/linodego v1.33.0/go.mod h1:dSJJgIwqZCF5wnpuC6w5cyIbRtcexAm7uVvuJopGB40= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= From bb513b2722b7c2367b0402c8583c609e538ce9df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 May 2024 23:31:24 +0000 Subject: [PATCH 049/112] build(deps): bump google.golang.org/api from 0.174.0 to 0.177.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.174.0 to 0.177.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/main/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.174.0...v0.177.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 165540e7f..cd2b70d2a 100644 --- a/go.mod +++ b/go.mod @@ -80,10 +80,10 @@ require ( golang.org/x/sys v0.19.0 golang.org/x/time v0.5.0 golang.org/x/tools v0.20.0 - google.golang.org/api v0.174.0 + google.golang.org/api v0.177.0 google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.29.3 @@ -94,8 +94,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.2.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.0 // indirect + cloud.google.com/go/auth v0.3.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect @@ -191,7 +191,7 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/term v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gotest.tools/v3 v3.0.3 // indirect diff --git a/go.sum b/go.sum index 4b8602b05..ee7b3acc2 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,10 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.2.0 h1:y6oTcpMSbOcXbwYgUUrvI+mrQ2xbrcdpPgtVbCGTLTk= -cloud.google.com/go/auth v0.2.0/go.mod h1:+yb+oy3/P0geX6DLKlqiGHARGR6EX2GRtYCzWOCQSbU= -cloud.google.com/go/auth/oauth2adapt v0.2.0 h1:FR8zevgQwu+8CqiOT5r6xCmJa3pJC/wdXEEPF1OkNhA= -cloud.google.com/go/auth/oauth2adapt v0.2.0/go.mod h1:AfqujpDAlTfLfeCIl/HJZZlIxD8+nJoZ5e0x1IxGq5k= +cloud.google.com/go/auth v0.3.0 h1:PRyzEpGfx/Z9e8+lHsbkoUVXD0gnu4MNmm7Gp8TQNIs= +cloud.google.com/go/auth v0.3.0/go.mod h1:lBv6NKTWp8E3LPzmO1TbiiRKc4drLOfHsgmlH9ogv5w= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -1045,8 +1045,8 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.174.0 h1:zB1BWl7ocxfTea2aQ9mgdzXjnfPySllpPOskdnO+q34= -google.golang.org/api v0.174.0/go.mod h1:aC7tB6j0HR1Nl0ni5ghpx6iLasmAX78Zkh/wgxAAjLg= +google.golang.org/api v0.177.0 h1:8a0p/BbPa65GlqGWtUKxot4p0TV8OGOfyTjtmkXNXmk= +google.golang.org/api v0.177.0/go.mod h1:srbhue4MLjkjbkux5p3dw/ocYOSZTaIEvf7bCOnFQDw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1085,8 +1085,8 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be h1:Zz7rLWqp0ApfsR/l7+zSHhY3PMiH2xqgxlfYfAfNpoU= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be/go.mod h1:dvdCTIoAGbkWbcIKBniID56/7XHTt6WfxXNMxuziJ+w= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be h1:LG9vZxsWGOmUKieR8wPAUR3u3MpnYFQZROPIMaXh7/A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 h1:DujSIu+2tC9Ht0aPNA7jgj23Iq8Ewi5sgkQ++wdvonE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= @@ -1118,8 +1118,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From d2caf51874aab00aee17e4fd2efd02c879dfbbc6 Mon Sep 17 00:00:00 2001 From: Kushal shukla <85934954+kushalShukla-web@users.noreply.github.com> Date: Fri, 3 May 2024 18:12:39 +0530 Subject: [PATCH 050/112] removed formateoverview section (#13994) docs: Remove outdated information about remote-read API --------- Signed-off-by: kushagra Shukla Signed-off-by: Kushal shukla <85934954+kushalShukla-web@users.noreply.github.com> Signed-off-by: Arthur Silva Sens Co-authored-by: Arthur Silva Sens --- docs/querying/remote_read_api.md | 60 +------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index e3dd13306..efbd08e98 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -5,63 +5,7 @@ sort_rank: 7 # Remote Read API -This is not currently considered part of the stable API and is subject to change -even between non-major version releases of Prometheus. - -## Format overview - -The API response format is JSON. Every successful API request returns a `2xx` -status code. - -Invalid requests that reach the API handlers return a JSON error object -and one of the following HTTP response codes: - -- `400 Bad Request` when parameters are missing or incorrect. -- `422 Unprocessable Entity` when an expression can't be executed - ([RFC4918](https://tools.ietf.org/html/rfc4918#page-78)). -- `503 Service Unavailable` when queries time out or abort. - -Other non-`2xx` codes may be returned for errors occurring before the API -endpoint is reached. - -An array of warnings may be returned if there are errors that do -not inhibit the request execution. All of the data that was successfully -collected will be returned in the data field. - -The JSON response envelope format is as follows: - -``` -{ - "status": "success" | "error", - "data": , - - // Only set if status is "error". The data field may still hold - // additional data. - "errorType": "", - "error": "", - - // Only if there were warnings while executing the request. - // There will still be data in the data field. - "warnings": [""] -} -``` - -Generic placeholders are defined as follows: - -* ``: Input timestamps may be provided either in -[RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format or as a Unix timestamp -in seconds, with optional decimal places for sub-second precision. Output -timestamps are always represented as Unix timestamps in seconds. -* ``: Prometheus [time series -selectors](basics.md#time-series-selectors) like `http_requests_total` or -`http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time_durations). -For example, `5m` refers to a duration of 5 minutes. -* ``: boolean values (strings `true` and `false`). - -Note: Names of query parameters that may be repeated end with `[]`. - -## Remote Read API +> This is not currently considered part of the stable API and is subject to change even between non-major version releases of Prometheus. This API provides data read functionality from Prometheus. This interface expects [snappy](https://github.com/google/snappy) compression. The API definition is located [here](https://github.com/prometheus/prometheus/blob/master/prompb/remote.proto). @@ -79,5 +23,3 @@ This returns a message that includes a list of raw samples. These streamed chunks utilize an XOR algorithm inspired by the [Gorilla](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) compression to encode the chunks. However, it provides resolution to the millisecond instead of to the second. - - From c10186eeea1edbe466f8738fc8fcc8871a334f82 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Fri, 3 May 2024 14:23:46 +0100 Subject: [PATCH 051/112] BUGFIX: Mark the rule's restoration process as completed always (#14048) * BUGFIX: Mark the rule's restoration process as completed always In https://github.com/prometheus/prometheus/pull/13980 I introduced a change to reduce the number of queries executed when we restore alert statuses. With this, the querying semantics changed as we now need to go through all series before we enter the alert restoration loop and I missed the fact that exiting early when there are no rules to restore would lead to an incomplete restoration. An alert being restored is used as a proxy for "we're now ready to write `ALERTS/ALERTS_FOR_SERIES` metrics" so as a result we weren't writing the series if we didn't restore anything the first time around. --------- Signed-off-by: gotjosh --- rules/group.go | 6 +++++- rules/manager_test.go | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rules/group.go b/rules/group.go index 3a97c9564..1f4757de3 100644 --- a/rules/group.go +++ b/rules/group.go @@ -672,6 +672,9 @@ func (g *Group) RestoreForState(ts time.Time) { "stage", "Select", "err", err, ) + // Even if we failed to query the `ALERT_FOR_STATE` series, we currently have no way to retry the restore process. + // So the best we can do is mark the rule as restored and let it eventually fire. + alertRule.SetRestored(true) continue } @@ -683,7 +686,8 @@ func (g *Group) RestoreForState(ts time.Time) { // No results for this alert rule. if len(seriesByLabels) == 0 { - level.Debug(g.logger).Log("msg", "Failed to find a series to restore the 'for' state", labels.AlertName, alertRule.Name()) + level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + alertRule.SetRestored(true) continue } diff --git a/rules/manager_test.go b/rules/manager_test.go index 07159145f..c45569bef 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -482,6 +482,9 @@ func TestForStateRestore(t *testing.T) { return labels.Compare(got[i].Labels, got[j].Labels) < 0 }) + // In all cases, we expect the restoration process to have completed. + require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + // Checking if we have restored it correctly. switch { case tt.noRestore: From 4b7a44c7a0568b4d9b7050b05d483fd54845d201 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 6 May 2024 11:51:08 +0200 Subject: [PATCH 052/112] Fix `parser.VectorSelector.String()` with empty name matcher (#14015) The check fell into "this matcher equals vector selector's name" case when vector selector doesn't have a name and the matcher is an explicit matcher for an empty __name__ label. To provide some context about why this is important: some downstream projects use the promql.Parse(expr.String()) to clone an expression's AST, and with this bug that matcher disappears in the cloning. Signed-off-by: Oleg Zaytsev --- promql/parser/printer.go | 4 ++-- promql/parser/printer_test.go | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/promql/parser/printer.go b/promql/parser/printer.go index ff171f215..f3bdefdeb 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -204,8 +204,8 @@ func (node *VectorSelector) String() string { labelStrings = make([]string, 0, len(node.LabelMatchers)-1) } for _, matcher := range node.LabelMatchers { - // Only include the __name__ label if its equality matching and matches the name. - if matcher.Name == labels.MetricName && matcher.Type == labels.MatchEqual && matcher.Value == node.Name { + // Only include the __name__ label if its equality matching and matches the name, but don't skip if it's an explicit empty name matcher. + if matcher.Name == labels.MetricName && matcher.Type == labels.MatchEqual && matcher.Value == node.Name && matcher.Value != "" { continue } labelStrings = append(labelStrings, matcher.String()) diff --git a/promql/parser/printer_test.go b/promql/parser/printer_test.go index a044b6969..554fb6962 100644 --- a/promql/parser/printer_test.go +++ b/promql/parser/printer_test.go @@ -135,6 +135,9 @@ func TestExprString(t *testing.T) { { in: `a[1m] @ end()`, }, + { + in: `{__name__="",a="x"}`, + }, } for _, test := range inputs { @@ -216,6 +219,16 @@ func TestVectorSelector_String(t *testing.T) { }, expected: `{__name__="foobar"}`, }, + { + name: "empty name matcher", + vs: VectorSelector{ + LabelMatchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, ""), + labels.MustNewMatcher(labels.MatchEqual, "a", "x"), + }, + }, + expected: `{__name__="",a="x"}`, + }, } { t.Run(tc.name, func(t *testing.T) { require.Equal(t, tc.expected, tc.vs.String()) From 2524a915915d7eb1b1207152d2e0ce5771193404 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 7 May 2024 16:33:37 +0200 Subject: [PATCH 053/112] Fix FastRegexMatcher matching multibyte runes with . (#14059) When `zeroOrOneCharacterStringMatcher` wach checking the input string, it assumed that if there are more than one bytes, then there are more than one runes, but that's not necessarily true. Signed-off-by: Oleg Zaytsev --- model/labels/regexp.go | 25 +++++++++++++++++++++++-- model/labels/regexp_test.go | 2 +- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index f35dc76f6..79e340984 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -16,6 +16,7 @@ package labels import ( "slices" "strings" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -827,8 +828,7 @@ type zeroOrOneCharacterStringMatcher struct { } func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { - // Zero or one. - if len(s) > 1 { + if moreThanOneRune(s) { return false } @@ -840,6 +840,27 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { return s[0] != '\n' } +// moreThanOneRune returns true if there are more than one runes in the string. +// It doesn't check whether the string is valid UTF-8. +// The return value should be always equal to utf8.RuneCountInString(s) > 1, +// but the function is optimized for the common case where the string prefix is ASCII. +func moreThanOneRune(s string) bool { + // If len(s) is exactly one or zero, there can't be more than one rune. + // Exit through this path quickly. + if len(s) <= 1 { + return false + } + + // There's one or more bytes: + // If first byte is ASCII then there are multiple runes if there are more bytes after that. + if s[0] < utf8.RuneSelf { + return len(s) > 1 + } + + // Less common case: first is a multibyte rune. + return utf8.RuneCountInString(s) > 1 +} + // trueMatcher is a stringMatcher which matches any string (always returns true). type trueMatcher struct{} diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 3a15b52b4..47d3eeb4a 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -84,7 +84,7 @@ var ( "foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "", "FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", - "foofoo0", "foofoo", + "foofoo0", "foofoo", "😀foo0", // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", From 87427682fd8d059614f71c1bc053c992b11d779d Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 7 May 2024 22:11:59 +0530 Subject: [PATCH 054/112] bugfix: allow opting-out of multi-cluster setups Allow users to opt-out of the multi-cluster setup for Prometheus dashboard, in environments where it isn't applicable. Refer: https://github.com/prometheus/prometheus/pull/13180. Signed-off-by: Pranshu Srivastava --- .../prometheus-mixin/config.libsonnet | 5 ++ .../prometheus-mixin/dashboards.libsonnet | 81 +++++++++++++++---- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/documentation/prometheus-mixin/config.libsonnet b/documentation/prometheus-mixin/config.libsonnet index ab9079a5e..70d46a221 100644 --- a/documentation/prometheus-mixin/config.libsonnet +++ b/documentation/prometheus-mixin/config.libsonnet @@ -44,5 +44,10 @@ // The default refresh time for all dashboards, default to 60s refresh: '60s', }, + + // Opt-out of multi-cluster dashboards by overriding this. + showMultiCluster: true, + // The cluster label to infer the cluster name from. + clusterLabel: 'cluster', }, } diff --git a/documentation/prometheus-mixin/dashboards.libsonnet b/documentation/prometheus-mixin/dashboards.libsonnet index efe53dbac..2bdd168cc 100644 --- a/documentation/prometheus-mixin/dashboards.libsonnet +++ b/documentation/prometheus-mixin/dashboards.libsonnet @@ -10,21 +10,32 @@ local template = grafana.template; { grafanaDashboards+:: { 'prometheus.json': - g.dashboard( + local showMultiCluster = $._config.showMultiCluster; + local dashboard = g.dashboard( '%(prefix)sOverview' % $._config.grafanaPrometheus - ) - .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'cluster') - .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') - .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + ); + local templatedDashboard = if showMultiCluster then + dashboard + .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, $._config.clusterLabel) + .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') + .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + else + dashboard + .addMultiTemplate('job', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'job') + .addMultiTemplate('instance', 'prometheus_build_info{job=~"$job"}', 'instance'); + templatedDashboard .addRow( g.row('Prometheus Stats') .addPanel( g.panel('Prometheus Stats') + - g.tablePanel([ + g.tablePanel(if showMultiCluster then [ 'count by (cluster, job, instance, version) (prometheus_build_info{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', 'max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', + ] else [ + 'count by (job, instance, version) (prometheus_build_info{job=~"$job", instance=~"$instance"})', + 'max by (job, instance) (time() - process_start_time_seconds{job=~"$job", instance=~"$instance"})', ], { - cluster: { alias: 'Cluster' }, + cluster: { alias: if showMultiCluster then 'Cluster' else '' }, job: { alias: 'Job' }, instance: { alias: 'Instance' }, version: { alias: 'Version' }, @@ -37,12 +48,18 @@ local template = grafana.template; g.row('Discovery') .addPanel( g.panel('Target Sync') + - g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3', '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}') + + g.queryPanel(if showMultiCluster then 'sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3' + else 'sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}' + else '{{scrape_job}}') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Targets') + - g.queryPanel('sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})', '{{cluster}}:{{job}}:{{instance}}') + + g.queryPanel(if showMultiCluster then 'sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})' + else 'sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}' + else 'Targets') + g.stack ) ) @@ -50,29 +67,47 @@ local template = grafana.template; g.row('Retrieval') .addPanel( g.panel('Average Scrape Interval Duration') + - g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{cluster}}:{{job}}:{{instance}} {{interval}} configured') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3' + else 'rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}} {{interval}} configured' + else '{{interval}} configured') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Scrape failures') + - g.queryPanel([ + g.queryPanel(if showMultiCluster then [ 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', - ], [ + ] else [ + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))', + ], if showMultiCluster then [ 'exceeded body size limit: {{cluster}} {{job}} {{instance}}', 'exceeded sample limit: {{cluster}} {{job}} {{instance}}', 'duplicate timestamp: {{cluster}} {{job}} {{instance}}', 'out of bounds: {{cluster}} {{job}} {{instance}}', 'out of order: {{cluster}} {{job}} {{instance}}', + ] else [ + 'exceeded body size limit: {{job}}', + 'exceeded sample limit: {{job}}', + 'duplicate timestamp: {{job}}', + 'out of bounds: {{job}}', + 'out of order: {{job}}', ]) + g.stack ) .addPanel( g.panel('Appended Samples') + - g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])' + else 'rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack ) ) @@ -80,12 +115,18 @@ local template = grafana.template; g.row('Storage') .addPanel( g.panel('Head Series') + - g.queryPanel('prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head series') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_series{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head series' + else '{{job}} {{instance}} head series') + g.stack ) .addPanel( g.panel('Head Chunks') + - g.queryPanel('prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head chunks') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_chunks{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head chunks' + else '{{job}} {{instance}} head chunks') + g.stack ) ) @@ -93,12 +134,18 @@ local template = grafana.template; g.row('Query') .addPanel( g.panel('Query Rate') + - g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])' + else 'rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack, ) .addPanel( g.panel('Stage Duration') + - g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3', '{{slice}}') + + g.queryPanel(if showMultiCluster then 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3' + else 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",job=~"$job",instance=~"$instance"}) * 1e3', + if showMultiCluster then '{{slice}}' + else '{{slice}}') + { yaxes: g.yaxes('ms') } + g.stack, ) From a25160e6a45bd030702f6a1c747b9a3b64e2b372 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 8 May 2024 11:39:44 +0200 Subject: [PATCH 055/112] [REFACTOR] PromQL: simplify rangeEvalTimestampFunctionOverVectorSelector (#14021) The function `rangeEvalTimestampFunctionOverVectorSelector` appeared to be checking histogram size, however the value it used was always 0 due to subtle variable shadowing. However we don't need to pass sample values to the `timestamp` function, since the latter only cares about timestamps. This also affects peak sample count in statistics, since we are no longer copying histogram samples. Signed-off-by: Arve Knudsen --- promql/engine.go | 32 ++++++++++++++------------------ promql/engine_test.go | 6 +++--- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index b8a8ea095..4bd9d25d7 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -2024,25 +2024,21 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, f, h, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) - if ok { - vec = append(vec, Sample{ - Metric: s.Labels(), - T: t, - F: f, - H: h, - }) - histSize := 0 - if h != nil { - histSize := h.Size() / 16 // 16 bytes per sample. - ev.currentSamples += histSize - } - ev.currentSamples++ + t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + if !ok { + continue + } - ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, int64(1+histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + // Note that we ignore the sample values because call only cares about the timestamp. + vec = append(vec, Sample{ + Metric: s.Labels(), + T: t, + }) + + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) } } ev.samplesStats.UpdatePeak(ev.currentSamples) diff --git a/promql/engine_test.go b/promql/engine_test.go index 0202c15ae..09992c672 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -818,8 +818,8 @@ load 10s { Query: "timestamp(metricWith1HistogramEvery10Seconds)", Start: time.Unix(21, 0), - PeakSamples: 13, // histogram size 12 + 1 extra because of timestamp - TotalSamples: 1, // 1 float sample (because of timestamp) / 10 seconds + PeakSamples: 2, + TotalSamples: 1, // 1 float sample (because of timestamp) / 10 seconds TotalSamplesPerStep: stats.TotalSamplesPerStep{ 21000: 1, }, @@ -1116,7 +1116,7 @@ load 10s Start: time.Unix(201, 0), End: time.Unix(220, 0), Interval: 5 * time.Second, - PeakSamples: 16, + PeakSamples: 5, TotalSamples: 4, // 1 sample per query * 4 steps TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 1, From 108a6bc9f675eb2645d0bf5f9ee1fa2741c32c74 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 17 Jan 2024 18:28:06 +0100 Subject: [PATCH 056/112] tsdb/chunkenc.Pool: Refactor Get and Put Signed-off-by: Arve Knudsen --- tsdb/chunkenc/bstream.go | 6 +++ tsdb/chunkenc/bstream_test.go | 13 +++++ tsdb/chunkenc/chunk.go | 72 ++++++++++--------------- tsdb/chunkenc/chunk_test.go | 90 ++++++++++++++++++++++++++++++++ tsdb/chunkenc/float_histogram.go | 4 ++ tsdb/chunkenc/histogram.go | 4 ++ tsdb/chunkenc/xor.go | 5 +- 7 files changed, 150 insertions(+), 44 deletions(-) diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index 7b17f4686..8cc59f3ea 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -52,6 +52,12 @@ type bstream struct { count uint8 // How many right-most bits are available for writing in the current byte (the last byte of the stream). } +// Reset resets b around stream. +func (b *bstream) Reset(stream []byte) { + b.stream = stream + b.count = 0 +} + func (b *bstream) bytes() []byte { return b.stream } diff --git a/tsdb/chunkenc/bstream_test.go b/tsdb/chunkenc/bstream_test.go index 66a54bc8e..8ac45ef0b 100644 --- a/tsdb/chunkenc/bstream_test.go +++ b/tsdb/chunkenc/bstream_test.go @@ -19,6 +19,19 @@ import ( "github.com/stretchr/testify/require" ) +func TestBstream_Reset(t *testing.T) { + bs := bstream{ + stream: []byte("test"), + count: 10, + } + bs.Reset([]byte("was reset")) + + require.Equal(t, bstream{ + stream: []byte("was reset"), + count: 0, + }, bs) +} + func TestBstreamReader(t *testing.T) { // Write to the bit stream. w := bstream{} diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go index 21c41257b..1421f3b39 100644 --- a/tsdb/chunkenc/chunk.go +++ b/tsdb/chunkenc/chunk.go @@ -87,6 +87,9 @@ type Chunk interface { // There's no strong guarantee that no samples will be appended once // Compact() is called. Implementing this function is optional. Compact() + + // Reset resets the chunk given stream. + Reset(stream []byte) } type Iterable interface { @@ -303,64 +306,47 @@ func NewPool() Pool { } func (p *pool) Get(e Encoding, b []byte) (Chunk, error) { + var c Chunk switch e { case EncXOR: - c := p.xor.Get().(*XORChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.xor.Get().(*XORChunk) case EncHistogram: - c := p.histogram.Get().(*HistogramChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.histogram.Get().(*HistogramChunk) case EncFloatHistogram: - c := p.floatHistogram.Get().(*FloatHistogramChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.floatHistogram.Get().(*FloatHistogramChunk) + default: + return nil, fmt.Errorf("invalid chunk encoding %q", e) } - return nil, fmt.Errorf("invalid chunk encoding %q", e) + + c.Reset(b) + return c, nil } func (p *pool) Put(c Chunk) error { + var sp *sync.Pool + var ok bool switch c.Encoding() { case EncXOR: - xc, ok := c.(*XORChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - xc.b.stream = nil - xc.b.count = 0 - p.xor.Put(c) + _, ok = c.(*XORChunk) + sp = &p.xor case EncHistogram: - sh, ok := c.(*HistogramChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - sh.b.stream = nil - sh.b.count = 0 - p.histogram.Put(c) + _, ok = c.(*HistogramChunk) + sp = &p.histogram case EncFloatHistogram: - sh, ok := c.(*FloatHistogramChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - sh.b.stream = nil - sh.b.count = 0 - p.floatHistogram.Put(c) + _, ok = c.(*FloatHistogramChunk) + sp = &p.floatHistogram default: return fmt.Errorf("invalid chunk encoding %q", c.Encoding()) } + if !ok { + // This may happen often with wrapped chunks. Nothing we can really do about + // it but returning an error would cause a lot of allocations again. Thus, + // we just skip it. + return nil + } + + c.Reset(nil) + sp.Put(c) return nil } diff --git a/tsdb/chunkenc/chunk_test.go b/tsdb/chunkenc/chunk_test.go index 9db1bf364..b72492a08 100644 --- a/tsdb/chunkenc/chunk_test.go +++ b/tsdb/chunkenc/chunk_test.go @@ -110,6 +110,96 @@ func testChunk(t *testing.T, c Chunk) { require.Equal(t, ValNone, it3.Seek(exp[len(exp)-1].t+1)) } +func TestPool(t *testing.T) { + p := NewPool() + for _, tc := range []struct { + name string + encoding Encoding + expErr error + }{ + { + name: "xor", + encoding: EncXOR, + }, + { + name: "histogram", + encoding: EncHistogram, + }, + { + name: "float histogram", + encoding: EncFloatHistogram, + }, + { + name: "invalid encoding", + encoding: EncNone, + expErr: fmt.Errorf(`invalid chunk encoding "none"`), + }, + } { + t.Run(tc.name, func(t *testing.T) { + c, err := p.Get(tc.encoding, []byte("test")) + if tc.expErr != nil { + require.EqualError(t, err, tc.expErr.Error()) + return + } + + require.NoError(t, err) + + var b *bstream + switch tc.encoding { + case EncHistogram: + b = &c.(*HistogramChunk).b + case EncFloatHistogram: + b = &c.(*FloatHistogramChunk).b + default: + b = &c.(*XORChunk).b + } + + require.Equal(t, &bstream{ + stream: []byte("test"), + count: 0, + }, b) + + b.count = 1 + require.NoError(t, p.Put(c)) + require.Equal(t, &bstream{ + stream: nil, + count: 0, + }, b) + }) + } + + t.Run("put bad chunk wrapper", func(t *testing.T) { + // When a wrapping chunk poses as an encoding it can't be converted to, Put should skip it. + c := fakeChunk{ + encoding: EncXOR, + t: t, + } + require.NoError(t, p.Put(c)) + }) + t.Run("put invalid encoding", func(t *testing.T) { + c := fakeChunk{ + encoding: EncNone, + t: t, + } + require.EqualError(t, p.Put(c), `invalid chunk encoding "none"`) + }) +} + +type fakeChunk struct { + Chunk + + encoding Encoding + t *testing.T +} + +func (c fakeChunk) Encoding() Encoding { + return c.encoding +} + +func (c fakeChunk) Reset([]byte) { + c.t.Fatal("Reset should not be called") +} + func benchmarkIterator(b *testing.B, newChunk func() Chunk) { const samplesPerChunk = 250 var ( diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index 88d189254..1eed46ca8 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -44,6 +44,10 @@ func NewFloatHistogramChunk() *FloatHistogramChunk { return &FloatHistogramChunk{b: bstream{stream: b, count: 0}} } +func (c *FloatHistogramChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // xorValue holds all the necessary information to encode // and decode XOR encoded float64 values. type xorValue struct { diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index cb09eda26..e12aec4dc 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -45,6 +45,10 @@ func NewHistogramChunk() *HistogramChunk { return &HistogramChunk{b: bstream{stream: b, count: 0}} } +func (c *HistogramChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // Encoding returns the encoding type. func (c *HistogramChunk) Encoding() Encoding { return EncHistogram diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 07b923831..9430de396 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -66,6 +66,10 @@ func NewXORChunk() *XORChunk { return &XORChunk{b: bstream{stream: b, count: 0}} } +func (c *XORChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // Encoding returns the encoding type. func (c *XORChunk) Encoding() Encoding { return EncXOR @@ -171,7 +175,6 @@ func (a *xorAppender) Append(t int64, v float64) { } a.writeVDelta(v) - default: tDelta = uint64(t - a.t) dod := int64(tDelta - a.tDelta) From a1af3c27d4b15fa1de5f829f8f9e4ecb20c2eff6 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 12:37:32 +0100 Subject: [PATCH 057/112] refactor: extract almost.Equal() to new package To avoid a circular reference between promql and promqltest. Signed-off-by: Bryan Boreham --- promql/engine_test.go | 3 ++- promql/quantile.go | 3 ++- promql/test.go | 33 ++++----------------------------- util/almost/almost.go | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 31 deletions(-) create mode 100644 util/almost/almost.go diff --git a/promql/engine_test.go b/promql/engine_test.go index 0202c15ae..36f36807a 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -35,6 +35,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" + "github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/teststorage" @@ -3873,7 +3874,7 @@ func TestNativeHistogram_HistogramQuantile(t *testing.T) { require.Len(t, vector, 1) require.Nil(t, vector[0].H) - require.True(t, almostEqual(sc.value, vector[0].F, defaultEpsilon)) + require.True(t, almost.Equal(sc.value, vector[0].F, defaultEpsilon)) }) } idx++ diff --git a/promql/quantile.go b/promql/quantile.go index 6a225afb1..d4bc9ee6e 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -20,6 +20,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/almost" ) // smallDeltaTolerance is the threshold for relative deltas between classic @@ -397,7 +398,7 @@ func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bo // No correction needed if the counts are identical between buckets. continue } - if almostEqual(prev, curr, tolerance) { + if almost.Equal(prev, curr, tolerance) { // Silently correct numerically insignificant differences from floating // point precision errors, regardless of direction. // Do not update the 'prev' value as we are ignoring the difference. diff --git a/promql/test.go b/promql/test.go index 1cdfe8d31..b982c33c3 100644 --- a/promql/test.go +++ b/promql/test.go @@ -19,7 +19,6 @@ import ( "errors" "fmt" "io/fs" - "math" "strconv" "strings" "testing" @@ -36,13 +35,12 @@ import ( "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) var ( - minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. - patSpace = regexp.MustCompile("[\t ]+") patLoad = regexp.MustCompile(`^load\s+(.+?)$`) patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) @@ -551,7 +549,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected float value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s)) } - if !almostEqual(actual.F, expected.F, defaultEpsilon) { + if !almost.Equal(actual.F, expected.F, defaultEpsilon) { return fmt.Errorf("expected float value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.F, actual.F, formatSeriesResult(s)) } } @@ -601,7 +599,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if expH != nil && !expH.Compact(0).Equals(v.H) { return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H)) } - if !almostEqual(exp0.Value, v.F, defaultEpsilon) { + if !almost.Equal(exp0.Value, v.F, defaultEpsilon) { return fmt.Errorf("expected %v for %s but got %v", exp0.Value, v.Metric, v.F) } @@ -621,7 +619,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if exp0.Histogram != nil { return fmt.Errorf("expected Histogram %v but got scalar %s", exp0.Histogram.TestExpression(), val.String()) } - if !almostEqual(exp0.Value, val.V, defaultEpsilon) { + if !almost.Equal(exp0.Value, val.V, defaultEpsilon) { return fmt.Errorf("expected Scalar %v but got %v", val.V, exp0.Value) } @@ -894,29 +892,6 @@ func (t *test) clear() { t.context, t.cancelCtx = context.WithCancel(context.Background()) } -// almostEqual returns true if a and b differ by less than their sum -// multiplied by epsilon. -func almostEqual(a, b, epsilon float64) bool { - // NaN has no equality but for testing we still want to know whether both values - // are NaN. - if math.IsNaN(a) && math.IsNaN(b) { - return true - } - - // Cf. http://floating-point-gui.de/errors/comparison/ - if a == b { - return true - } - - absSum := math.Abs(a) + math.Abs(b) - diff := math.Abs(a - b) - - if a == 0 || b == 0 || absSum < minNormal { - return diff < epsilon*minNormal - } - return diff/math.Min(absSum, math.MaxFloat64) < epsilon -} - func parseNumber(s string) (float64, error) { n, err := strconv.ParseInt(s, 0, 64) f := float64(n) diff --git a/util/almost/almost.go b/util/almost/almost.go new file mode 100644 index 000000000..34f1290a5 --- /dev/null +++ b/util/almost/almost.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package almost + +import "math" + +var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. + +// Equal returns true if a and b differ by less than their sum +// multiplied by epsilon. +func Equal(a, b, epsilon float64) bool { + // NaN has no equality but for testing we still want to know whether both values + // are NaN. + if math.IsNaN(a) && math.IsNaN(b) { + return true + } + + // Cf. http://floating-point-gui.de/errors/comparison/ + if a == b { + return true + } + + absSum := math.Abs(a) + math.Abs(b) + diff := math.Abs(a - b) + + if a == 0 || b == 0 || absSum < minNormal { + return diff < epsilon*minNormal + } + return diff/math.Min(absSum, math.MaxFloat64) < epsilon +} From 2b0c87b1b6ee494c75448f23e6465c8cf20acda0 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 8 May 2024 11:47:03 +0100 Subject: [PATCH 058/112] test: turn TestKahanSum into scripted test This saves having a function solely to call kahanSumInc. Signed-off-by: Bryan Boreham --- promql/functions.go | 9 --------- promql/functions_test.go | 7 ------- promql/testdata/functions.test | 8 ++++++++ 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/promql/functions.go b/promql/functions.go index 2e15a1467..9b3be2287 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -948,15 +948,6 @@ func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHe return enh.Out, nil } -func kahanSum(samples []float64) float64 { - var sum, c float64 - - for _, v := range samples { - sum, c = kahanSumInc(v, sum, c) - } - return sum + c -} - func kahanSumInc(inc, sum, c float64) (newSum, newC float64) { t := sum + inc // Using Neumaier improvement, swap if next term larger than sum. diff --git a/promql/functions_test.go b/promql/functions_test.go index 6d5c3784e..08e4900f5 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -15,7 +15,6 @@ package promql import ( "context" - "math" "testing" "time" @@ -80,9 +79,3 @@ func TestFunctionList(t *testing.T) { require.True(t, ok, "function %s exists in parser package, but not in promql package", i) } } - -func TestKahanSum(t *testing.T) { - vals := []float64{1.0, math.Pow(10, 100), 1.0, -1 * math.Pow(10, 100)} - expected := 2.0 - require.Equal(t, expected, kahanSum(vals)) -} diff --git a/promql/testdata/functions.test b/promql/testdata/functions.test index e01c75a7f..2c198374a 100644 --- a/promql/testdata/functions.test +++ b/promql/testdata/functions.test @@ -764,6 +764,14 @@ eval instant at 1m avg_over_time(metric10[1m]) eval instant at 1m sum_over_time(metric10[1m])/count_over_time(metric10[1m]) {} 0 +# Test if very big intermediate values cause loss of detail. +clear +load 10s + metric 1 1e100 1 -1e100 + +eval instant at 1m sum_over_time(metric[1m]) + {} 2 + # Tests for stddev_over_time and stdvar_over_time. clear load 10s From a6e427660aa7e6b360049f8bf6990c3bdb80a9aa Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 4 May 2024 14:22:28 +0100 Subject: [PATCH 059/112] test: check for @-modifier without using engine internals Signed-off-by: Bryan Boreham --- promql/test.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/promql/test.go b/promql/test.go index b982c33c3..2cfcfce6f 100644 --- a/promql/test.go +++ b/promql/test.go @@ -676,8 +676,7 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa // If there is a subquery, then the selectors inside it don't get the @ timestamp. // If any selector already has the @ timestamp set, then it is untouched. parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { - _, _, subqTs := subqueryTimes(path) - if subqTs != nil { + if hasAtModifier(path) { // There is a subquery with timestamp in the path, // hence don't change any timestamps further. return nil @@ -727,6 +726,17 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa return testCases, nil } +func hasAtModifier(path []parser.Node) bool { + for _, node := range path { + if n, ok := node.(*parser.SubqueryExpr); ok { + if n.Timestamp != nil { + return true + } + } + } + return false +} + // exec processes a single step of the test. func (t *test) exec(tc testCommand, engine QueryEngine) error { switch cmd := tc.(type) { From 11b27d5d229d7ee815bd7cfc77c6682b1f3eed0c Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 10:44:53 +0100 Subject: [PATCH 060/112] test: move test files into new promqltest package So that promql package does not bring in test-only dependencies. Signed-off-by: Bryan Boreham --- promql/{ => promqltest}/test.go | 0 promql/{ => promqltest}/testdata/aggregators.test | 0 promql/{ => promqltest}/testdata/at_modifier.test | 0 promql/{ => promqltest}/testdata/collision.test | 0 promql/{ => promqltest}/testdata/functions.test | 0 promql/{ => promqltest}/testdata/histograms.test | 0 promql/{ => promqltest}/testdata/literals.test | 0 promql/{ => promqltest}/testdata/native_histograms.test | 0 promql/{ => promqltest}/testdata/operators.test | 0 promql/{ => promqltest}/testdata/selectors.test | 0 promql/{ => promqltest}/testdata/staleness.test | 0 promql/{ => promqltest}/testdata/subquery.test | 0 promql/{ => promqltest}/testdata/trig_functions.test | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename promql/{ => promqltest}/test.go (100%) rename promql/{ => promqltest}/testdata/aggregators.test (100%) rename promql/{ => promqltest}/testdata/at_modifier.test (100%) rename promql/{ => promqltest}/testdata/collision.test (100%) rename promql/{ => promqltest}/testdata/functions.test (100%) rename promql/{ => promqltest}/testdata/histograms.test (100%) rename promql/{ => promqltest}/testdata/literals.test (100%) rename promql/{ => promqltest}/testdata/native_histograms.test (100%) rename promql/{ => promqltest}/testdata/operators.test (100%) rename promql/{ => promqltest}/testdata/selectors.test (100%) rename promql/{ => promqltest}/testdata/staleness.test (100%) rename promql/{ => promqltest}/testdata/subquery.test (100%) rename promql/{ => promqltest}/testdata/trig_functions.test (100%) diff --git a/promql/test.go b/promql/promqltest/test.go similarity index 100% rename from promql/test.go rename to promql/promqltest/test.go diff --git a/promql/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test similarity index 100% rename from promql/testdata/aggregators.test rename to promql/promqltest/testdata/aggregators.test diff --git a/promql/testdata/at_modifier.test b/promql/promqltest/testdata/at_modifier.test similarity index 100% rename from promql/testdata/at_modifier.test rename to promql/promqltest/testdata/at_modifier.test diff --git a/promql/testdata/collision.test b/promql/promqltest/testdata/collision.test similarity index 100% rename from promql/testdata/collision.test rename to promql/promqltest/testdata/collision.test diff --git a/promql/testdata/functions.test b/promql/promqltest/testdata/functions.test similarity index 100% rename from promql/testdata/functions.test rename to promql/promqltest/testdata/functions.test diff --git a/promql/testdata/histograms.test b/promql/promqltest/testdata/histograms.test similarity index 100% rename from promql/testdata/histograms.test rename to promql/promqltest/testdata/histograms.test diff --git a/promql/testdata/literals.test b/promql/promqltest/testdata/literals.test similarity index 100% rename from promql/testdata/literals.test rename to promql/promqltest/testdata/literals.test diff --git a/promql/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test similarity index 100% rename from promql/testdata/native_histograms.test rename to promql/promqltest/testdata/native_histograms.test diff --git a/promql/testdata/operators.test b/promql/promqltest/testdata/operators.test similarity index 100% rename from promql/testdata/operators.test rename to promql/promqltest/testdata/operators.test diff --git a/promql/testdata/selectors.test b/promql/promqltest/testdata/selectors.test similarity index 100% rename from promql/testdata/selectors.test rename to promql/promqltest/testdata/selectors.test diff --git a/promql/testdata/staleness.test b/promql/promqltest/testdata/staleness.test similarity index 100% rename from promql/testdata/staleness.test rename to promql/promqltest/testdata/staleness.test diff --git a/promql/testdata/subquery.test b/promql/promqltest/testdata/subquery.test similarity index 100% rename from promql/testdata/subquery.test rename to promql/promqltest/testdata/subquery.test diff --git a/promql/testdata/trig_functions.test b/promql/promqltest/testdata/trig_functions.test similarity index 100% rename from promql/testdata/trig_functions.test rename to promql/promqltest/testdata/trig_functions.test From ccf73266f03a7de3fbd7d150ee80ee9521ba17a1 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 13:02:59 +0100 Subject: [PATCH 061/112] test: move promqltest tests together with the implementation Signed-off-by: Bryan Boreham --- promql/{ => promqltest}/test_test.go | 67 ++++++++++++++-------------- 1 file changed, 34 insertions(+), 33 deletions(-) rename promql/{ => promqltest}/test_test.go (91%) diff --git a/promql/test_test.go b/promql/promqltest/test_test.go similarity index 91% rename from promql/test_test.go rename to promql/promqltest/test_test.go index a5b24ac69..bad3e2f3b 100644 --- a/promql/test_test.go +++ b/promql/promqltest/test_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promqltest import ( "math" @@ -21,14 +21,15 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/tsdb/chunkenc" ) func TestLazyLoader_WithSamplesTill(t *testing.T) { type testCase struct { ts time.Time - series []Series // Each series is checked separately. Need not mention all series here. - checkOnlyError bool // If this is true, series is not checked. + series []promql.Series // Each series is checked separately. Need not mention all series here. + checkOnlyError bool // If this is true, series is not checked. } cases := []struct { @@ -44,10 +45,10 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { testCases: []testCase{ { ts: time.Unix(40, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ + Floats: []promql.FPoint{ {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, }, }, @@ -55,10 +56,10 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { }, { ts: time.Unix(10, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ + Floats: []promql.FPoint{ {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, }, }, @@ -66,10 +67,10 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { }, { ts: time.Unix(60, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ + Floats: []promql.FPoint{ {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, }, }, @@ -86,16 +87,16 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { testCases: []testCase{ { // Adds all samples of metric1. ts: time.Unix(70, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ + Floats: []promql.FPoint{ {0, 1}, {10000, 1}, {20000, 1}, {30000, 1}, {40000, 1}, {50000, 1}, }, }, { Metric: labels.FromStrings("__name__", "metric2"), - Floats: []FPoint{ + Floats: []promql.FPoint{ {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, {70000, 8}, }, }, @@ -140,13 +141,13 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { require.False(t, ss.Next(), "Expecting only 1 series") // Convert `storage.Series` to `promql.Series`. - got := Series{ + got := promql.Series{ Metric: storageSeries.Labels(), } it := storageSeries.Iterator(nil) for it.Next() == chunkenc.ValFloat { t, v := it.At() - got.Floats = append(got.Floats, FPoint{T: t, F: v}) + got.Floats = append(got.Floats, promql.FPoint{T: t, F: v}) } require.NoError(t, it.Err()) @@ -450,7 +451,7 @@ eval range from 0 to 5m step 5m testmetric for name, testCase := range testCases { t.Run(name, func(t *testing.T) { - err := runTest(t, testCase.input, newTestEngine()) + err := runTest(t, testCase.input, NewTestEngine()) if testCase.expectedError == "" { require.NoError(t, err) @@ -463,42 +464,42 @@ eval range from 0 to 5m step 5m testmetric func TestAssertMatrixSorted(t *testing.T) { testCases := map[string]struct { - matrix Matrix + matrix promql.Matrix expectedError string }{ "empty matrix": { - matrix: Matrix{}, + matrix: promql.Matrix{}, }, "matrix with one series": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, }, }, "matrix with two series, series in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, }, }, "matrix with two series, series in reverse order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_2")}, - Series{Metric: labels.FromStrings("the_label", "value_1")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, }, expectedError: `matrix results should always be sorted by labels, but matrix is not sorted: series at index 1 with labels {the_label="value_1"} sorts before series at index 0 with labels {the_label="value_2"}`, }, "matrix with three series, series in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, - Series{Metric: labels.FromStrings("the_label", "value_3")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_3")}, }, }, "matrix with three series, series not in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_3")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_3")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, }, expectedError: `matrix results should always be sorted by labels, but matrix is not sorted: series at index 2 with labels {the_label="value_2"} sorts before series at index 1 with labels {the_label="value_3"}`, }, From 9aa321d67236ab43c1c9ca69577fd361b87e2405 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 13:40:14 +0100 Subject: [PATCH 062/112] test: make field initializers explicit Lint started complaining after I moved the file. Signed-off-by: Bryan Boreham --- promql/promqltest/test_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index bad3e2f3b..d28f556f2 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -49,7 +49,7 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { { Metric: labels.FromStrings("__name__", "metric1"), Floats: []promql.FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, }, }, }, @@ -60,7 +60,7 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { { Metric: labels.FromStrings("__name__", "metric1"), Floats: []promql.FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, }, }, }, @@ -71,7 +71,7 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { { Metric: labels.FromStrings("__name__", "metric1"), Floats: []promql.FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, {T: 50000, F: 6}, {T: 60000, F: 7}, }, }, }, @@ -91,13 +91,13 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { { Metric: labels.FromStrings("__name__", "metric1"), Floats: []promql.FPoint{ - {0, 1}, {10000, 1}, {20000, 1}, {30000, 1}, {40000, 1}, {50000, 1}, + {T: 0, F: 1}, {T: 10000, F: 1}, {T: 20000, F: 1}, {T: 30000, F: 1}, {T: 40000, F: 1}, {T: 50000, F: 1}, }, }, { Metric: labels.FromStrings("__name__", "metric2"), Floats: []promql.FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, {70000, 8}, + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, {T: 50000, F: 6}, {T: 60000, F: 7}, {T: 70000, F: 8}, }, }, }, From b3c15d2246e9652f08c2b6030b82f8aaa943849e Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 10:47:45 +0100 Subject: [PATCH 063/112] test: clean up promqltest package references So it nearly compiles. Signed-off-by: Bryan Boreham --- promql/promqltest/test.go | 81 +++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 2cfcfce6f..eab26ad57 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promqltest import ( "context" @@ -32,6 +32,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" @@ -71,7 +72,7 @@ func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { } // RunBuiltinTests runs an acceptance test suite against the provided engine. -func RunBuiltinTests(t *testing.T, engine QueryEngine) { +func RunBuiltinTests(t *testing.T, engine promql.QueryEngine) { t.Cleanup(func() { parser.EnableExperimentalFunctions = false }) parser.EnableExperimentalFunctions = true @@ -88,11 +89,11 @@ func RunBuiltinTests(t *testing.T, engine QueryEngine) { } // RunTest parses and runs the test against the provided engine. -func RunTest(t testutil.T, input string, engine QueryEngine) { +func RunTest(t testutil.T, input string, engine promql.QueryEngine) { require.NoError(t, runTest(t, input, engine)) } -func runTest(t testutil.T, input string, engine QueryEngine) error { +func runTest(t testutil.T, input string, engine promql.QueryEngine) error { test, err := newTest(t, input) // Why do this before checking err? newTest() can create the test storage and then return an error, @@ -366,7 +367,7 @@ func (*evalCmd) testCmd() {} type loadCmd struct { gap time.Duration metrics map[uint64]labels.Labels - defs map[uint64][]Sample + defs map[uint64][]promql.Sample exemplars map[uint64][]exemplar.Exemplar } @@ -374,7 +375,7 @@ func newLoadCmd(gap time.Duration) *loadCmd { return &loadCmd{ gap: gap, metrics: map[uint64]labels.Labels{}, - defs: map[uint64][]Sample{}, + defs: map[uint64][]promql.Sample{}, exemplars: map[uint64][]exemplar.Exemplar{}, } } @@ -387,11 +388,11 @@ func (cmd loadCmd) String() string { func (cmd *loadCmd) set(m labels.Labels, vals ...parser.SequenceValue) { h := m.Hash() - samples := make([]Sample, 0, len(vals)) + samples := make([]promql.Sample, 0, len(vals)) ts := testStartTime for _, v := range vals { if !v.Omitted { - samples = append(samples, Sample{ + samples = append(samples, promql.Sample{ T: ts.UnixNano() / int64(time.Millisecond/time.Nanosecond), F: v.Value, H: v.Histogram, @@ -417,7 +418,7 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func appendSample(a storage.Appender, s Sample, m labels.Labels) error { +func appendSample(a storage.Appender, s promql.Sample, m labels.Labels) error { if s.H != nil { if _, err := a.AppendHistogram(0, m, s.T, nil, s.H); err != nil { return err @@ -501,7 +502,7 @@ func (ev *evalCmd) expectMetric(pos int, m labels.Labels, vals ...parser.Sequenc // compareResult compares the result value with the defined expectation. func (ev *evalCmd) compareResult(result parser.Value) error { switch val := result.(type) { - case Matrix: + case promql.Matrix: if ev.ordered { return fmt.Errorf("expected ordered result, but query returned a matrix") } @@ -519,8 +520,8 @@ func (ev *evalCmd) compareResult(result parser.Value) error { seen[hash] = true exp := ev.expected[hash] - var expectedFloats []FPoint - var expectedHistograms []HPoint + var expectedFloats []promql.FPoint + var expectedHistograms []promql.HPoint for i, e := range exp.vals { ts := ev.start.Add(time.Duration(i) * ev.step) @@ -532,9 +533,9 @@ func (ev *evalCmd) compareResult(result parser.Value) error { t := ts.UnixNano() / int64(time.Millisecond/time.Nanosecond) if e.Histogram != nil { - expectedHistograms = append(expectedHistograms, HPoint{T: t, H: e.Histogram}) + expectedHistograms = append(expectedHistograms, promql.HPoint{T: t, H: e.Histogram}) } else if !e.Omitted { - expectedFloats = append(expectedFloats, FPoint{T: t, F: e.Value}) + expectedFloats = append(expectedFloats, promql.FPoint{T: t, F: e.Value}) } } @@ -573,7 +574,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } } - case Vector: + case promql.Vector: seen := map[uint64]bool{} for pos, v := range val { fp := v.Metric.Hash() @@ -611,7 +612,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } } - case Scalar: + case promql.Scalar: if len(ev.expected) != 1 { return fmt.Errorf("expected vector result, but got scalar %s", val.String()) } @@ -629,7 +630,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return nil } -func formatSeriesResult(s Series) string { +func formatSeriesResult(s promql.Series) string { floatPlural := "s" histogramPlural := "s" @@ -698,7 +699,7 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa } case *parser.Call: - _, ok := AtModifierUnsafeFunctions[n.Func.Name] + _, ok := promql.AtModifierUnsafeFunctions[n.Func.Name] containsNonStepInvariant = containsNonStepInvariant || ok } return nil @@ -738,7 +739,7 @@ func hasAtModifier(path []parser.Node) bool { } // exec processes a single step of the test. -func (t *test) exec(tc testCommand, engine QueryEngine) error { +func (t *test) exec(tc testCommand, engine promql.QueryEngine) error { switch cmd := tc.(type) { case *clearCmd: t.clear() @@ -763,7 +764,7 @@ func (t *test) exec(tc testCommand, engine QueryEngine) error { return nil } -func (t *test) execEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execEval(cmd *evalCmd, engine promql.QueryEngine) error { if cmd.isRange { return t.execRangeEval(cmd, engine) } @@ -771,7 +772,7 @@ func (t *test) execEval(cmd *evalCmd, engine QueryEngine) error { return t.execInstantEval(cmd, engine) } -func (t *test) execRangeEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execRangeEval(cmd *evalCmd, engine promql.QueryEngine) error { q, err := engine.NewRangeQuery(t.context, t.storage, nil, cmd.expr, cmd.start, cmd.end, cmd.step) if err != nil { return fmt.Errorf("error creating range query for %q (line %d): %w", cmd.expr, cmd.line, err) @@ -796,7 +797,7 @@ func (t *test) execRangeEval(cmd *evalCmd, engine QueryEngine) error { return nil } -func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execInstantEval(cmd *evalCmd, engine promql.QueryEngine) error { queries, err := atModifierTestCases(cmd.expr, cmd.start) if err != nil { return err @@ -838,29 +839,29 @@ func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { // Range queries are always sorted by labels, so skip this test case that expects results in a particular order. continue } - mat := rangeRes.Value.(Matrix) + mat := rangeRes.Value.(promql.Matrix) if err := assertMatrixSorted(mat); err != nil { return err } - vec := make(Vector, 0, len(mat)) + vec := make(promql.Vector, 0, len(mat)) for _, series := range mat { // We expect either Floats or Histograms. for _, point := range series.Floats { if point.T == timeMilliseconds(iq.evalTime) { - vec = append(vec, Sample{Metric: series.Metric, T: point.T, F: point.F}) + vec = append(vec, promql.Sample{Metric: series.Metric, T: point.T, F: point.F}) break } } for _, point := range series.Histograms { if point.T == timeMilliseconds(iq.evalTime) { - vec = append(vec, Sample{Metric: series.Metric, T: point.T, H: point.H}) + vec = append(vec, promql.Sample{Metric: series.Metric, T: point.T, H: point.H}) break } } } - if _, ok := res.Value.(Scalar); ok { - err = cmd.compareResult(Scalar{V: vec[0].F}) + if _, ok := res.Value.(promql.Scalar); ok { + err = cmd.compareResult(promql.Scalar{V: vec[0].F}) } else { err = cmd.compareResult(vec) } @@ -872,7 +873,7 @@ func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { return nil } -func assertMatrixSorted(m Matrix) error { +func assertMatrixSorted(m promql.Matrix) error { if len(m) <= 1 { return nil } @@ -922,7 +923,7 @@ type LazyLoader struct { storage storage.Storage SubqueryInterval time.Duration - queryEngine *Engine + queryEngine *promql.Engine context context.Context cancelCtx context.CancelFunc @@ -989,7 +990,7 @@ func (ll *LazyLoader) clear() error { return err } - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, @@ -999,7 +1000,7 @@ func (ll *LazyLoader) clear() error { EnableNegativeOffset: ll.opts.EnableNegativeOffset, } - ll.queryEngine = NewEngine(opts) + ll.queryEngine = promql.NewEngine(opts) ll.context, ll.cancelCtx = context.WithCancel(context.Background()) return nil } @@ -1033,7 +1034,7 @@ func (ll *LazyLoader) WithSamplesTill(ts time.Time, fn func(error)) { } // QueryEngine returns the LazyLoader's query engine. -func (ll *LazyLoader) QueryEngine() *Engine { +func (ll *LazyLoader) QueryEngine() *promql.Engine { return ll.queryEngine } @@ -1059,3 +1060,17 @@ func (ll *LazyLoader) Close() error { ll.cancelCtx() return ll.storage.Close() } + +func makeInt64Pointer(val int64) *int64 { + valp := new(int64) + *valp = val + return valp +} + +func timeMilliseconds(t time.Time) int64 { + return t.UnixNano() / int64(time.Millisecond/time.Nanosecond) +} + +func durationMilliseconds(d time.Duration) int64 { + return int64(d / (time.Millisecond / time.Nanosecond)) +} From dabd789fd5d3a027ca070e175bd94e295edfe4d2 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Wed, 8 May 2024 16:24:58 +0200 Subject: [PATCH 064/112] Quote label name in matchers when needed When the label name of a matcher contains non-standard characters, like a dot, or starts with a digit, it should be quoted. If it's not quoted, then `VectorSelector.String()` isn't a valid PromQL. Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 15 +++- model/labels/matcher_test.go | 126 ++++++++++++++++++++++++++++++++++ promql/parser/printer_test.go | 10 +++ 3 files changed, 150 insertions(+), 1 deletion(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index 1282f80d6..291eac1c7 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -78,7 +78,20 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + if !m.shouldQuoteName() { + return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + } + return fmt.Sprintf("%q%s%q", m.Name, m.Type, m.Value) +} + +func (m *Matcher) shouldQuoteName() bool { + for i, c := range m.Name { + if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (i > 0 && c >= '0' && c <= '9') { + continue + } + return true + } + return false } // Matches returns whether the matcher matches the given string value. diff --git a/model/labels/matcher_test.go b/model/labels/matcher_test.go index c23deafe6..ff39d40d0 100644 --- a/model/labels/matcher_test.go +++ b/model/labels/matcher_test.go @@ -15,6 +15,7 @@ package labels import ( "fmt" + "math/rand" "testing" "github.com/stretchr/testify/require" @@ -225,3 +226,128 @@ func BenchmarkNewMatcher(b *testing.B) { } }) } + +func BenchmarkMatcher_String(b *testing.B) { + type benchCase struct { + name string + matchers []*Matcher + } + cases := []benchCase{ + { + name: "short name equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", "bar"), + MustNewMatcher(MatchEqual, "bar", "baz"), + MustNewMatcher(MatchEqual, "abc", "def"), + MustNewMatcher(MatchEqual, "ghi", "klm"), + MustNewMatcher(MatchEqual, "nop", "qrs"), + }, + }, + { + name: "short quoted name not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "f.o", "bar"), + MustNewMatcher(MatchEqual, "b.r", "baz"), + MustNewMatcher(MatchEqual, "a.c", "def"), + MustNewMatcher(MatchEqual, "g.i", "klm"), + MustNewMatcher(MatchEqual, "n.p", "qrs"), + }, + }, + { + name: "short quoted name with quotes not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, `"foo"`, "bar"), + MustNewMatcher(MatchEqual, `"foo"`, "baz"), + MustNewMatcher(MatchEqual, `"foo"`, "def"), + MustNewMatcher(MatchEqual, `"foo"`, "klm"), + MustNewMatcher(MatchEqual, `"foo"`, "qrs"), + }, + }, + { + name: "short name value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `"bar"`), + MustNewMatcher(MatchEqual, "bar", `"baz"`), + MustNewMatcher(MatchEqual, "abc", `"def"`), + MustNewMatcher(MatchEqual, "ghi", `"klm"`), + MustNewMatcher(MatchEqual, "nop", `"qrs"`), + }, + }, + { + name: "short name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "foo", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "bar", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "abc", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "ghi", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "nop", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "short name and long value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `five_six_seven_eight_nine_ten_"one"_two_three_four`), + MustNewMatcher(MatchEqual, "bar", `one_two_three_four_five_six_"seven"_eight_nine_ten`), + MustNewMatcher(MatchEqual, "abc", `two_three_four_five_six_seven_"eight"_nine_ten_one`), + MustNewMatcher(MatchEqual, "ghi", `three_four_five_six_seven_eight_"nine"_ten_one_two`), + MustNewMatcher(MatchEqual, "nop", `four_five_six_seven_eight_nine_"ten"_one_two_three`), + }, + }, + { + name: "long name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "val"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "val"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "val"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "val"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "val"), + }, + }, + { + name: "long quoted name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "val"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "val"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "val"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "val"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "val"), + }, + }, + { + name: "long name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "long quoted name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "five.six.seven.eight.nine.ten.one.two.three.four"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "one.two.three.four.five.six.seven.eight.nine.ten"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "two.three.four.five.six.seven.eight.nine.ten.one"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "three.four.five.six.seven.eight.nine.ten.one.two"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "four.five.six.seven.eight.nine.ten.one.two.three"), + }, + }, + } + + var mixed []*Matcher + for _, bc := range cases { + mixed = append(mixed, bc.matchers...) + } + rand.Shuffle(len(mixed), func(i, j int) { mixed[i], mixed[j] = mixed[j], mixed[i] }) + cases = append(cases, benchCase{name: "mixed", matchers: mixed}) + + for _, bc := range cases { + b.Run(bc.name, func(b *testing.B) { + for i := 0; i <= b.N; i++ { + m := bc.matchers[i%len(bc.matchers)] + _ = m.String() + } + }) + } +} diff --git a/promql/parser/printer_test.go b/promql/parser/printer_test.go index 554fb6962..f224d841d 100644 --- a/promql/parser/printer_test.go +++ b/promql/parser/printer_test.go @@ -138,6 +138,16 @@ func TestExprString(t *testing.T) { { in: `{__name__="",a="x"}`, }, + { + in: `{"a.b"="c"}`, + }, + { + in: `{"0"="1"}`, + }, + { + in: `{"_0"="1"}`, + out: `{_0="1"}`, + }, } for _, test := range inputs { From 6ebda5a7bc8c1f5611e41cc63a769bd9afc14592 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Wed, 8 May 2024 17:05:27 +0200 Subject: [PATCH 065/112] Optimize Matcher.String() Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index 291eac1c7..ce9e42471 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -14,7 +14,8 @@ package labels import ( - "fmt" + "strconv" + "unsafe" ) // MatchType is an enum for label matching types. @@ -78,10 +79,20 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - if !m.shouldQuoteName() { - return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + const quote = 1 + const matcher = 2 + // As we're not on go1.22 yet and we don't have the new fancy AvailableBuffer method on strings.Builder, + // we'll use a plain byte slice and then do the unsafe conversion to string just like strings.Builder does. + // We pre-allocate pessimistically for quoting the label name, and optimistically for not having to escape any quotes. + b := make([]byte, 0, quote+len(m.Name)+quote+matcher+quote+len(m.Value)+quote) + if m.shouldQuoteName() { + b = strconv.AppendQuote(b, m.Name) + } else { + b = append(b, m.Name...) } - return fmt.Sprintf("%q%s%q", m.Name, m.Type, m.Value) + b = append(b, m.Type.String()...) + b = strconv.AppendQuote(b, m.Value) + return *((*string)(unsafe.Pointer(&b))) } func (m *Matcher) shouldQuoteName() bool { From babfcfdd910ef2d3e829ae05bac766c1c4e3952b Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 13:13:23 +0100 Subject: [PATCH 066/112] refactor: Move NewTestEngine into promqltest And export `DefaultMaxSamplesPerQuery` so callers can replicate previous behaviour. Signed-off-by: Bryan Boreham --- promql/engine_test.go | 2 +- promql/promql_test.go | 12 ++---------- promql/promqltest/test.go | 17 ++++++++++++++++- promql/promqltest/test_test.go | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index 36f36807a..1c2722a1d 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -4999,7 +4999,7 @@ metric 0 1 2 if c.engineLookback != 0 { engine.lookbackDelta = c.engineLookback } - opts := NewPrometheusQueryOpts(false, c.queryLookback) + opts := promql.NewPrometheusQueryOpts(false, c.queryLookback) qry, err := engine.NewInstantQuery(context.Background(), storage, opts, query, c.ts) require.NoError(t, err) diff --git a/promql/promql_test.go b/promql/promql_test.go index 05821b1c1..87ec2cd4e 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -22,20 +22,12 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" ) func newTestEngine() *Engine { - return NewEngine(EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10000, - Timeout: 100 * time.Second, - NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) }, - EnableAtModifier: true, - EnableNegativeOffset: true, - EnablePerStepStats: true, - }) + return promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) } func TestEvaluations(t *testing.T) { diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index eab26ad57..1b2ce78af 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -49,7 +49,8 @@ var ( ) const ( - defaultEpsilon = 0.000001 // Relative error allowed for sample values. + defaultEpsilon = 0.000001 // Relative error allowed for sample values. + DefaultMaxSamplesPerQuery = 10000 ) var testStartTime = time.Unix(0, 0).UTC() @@ -71,6 +72,20 @@ func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { return test.storage } +func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { + return promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: DefaultMaxSamplesPerQuery, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) }, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: enablePerStepStats, + LookbackDelta: lookbackDelta, + }) +} + // RunBuiltinTests runs an acceptance test suite against the provided engine. func RunBuiltinTests(t *testing.T, engine promql.QueryEngine) { t.Cleanup(func() { parser.EnableExperimentalFunctions = false }) diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index d28f556f2..f6fe38707 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -451,7 +451,7 @@ eval range from 0 to 5m step 5m testmetric for name, testCase := range testCases { t.Run(name, func(t *testing.T) { - err := runTest(t, testCase.input, NewTestEngine()) + err := runTest(t, testCase.input, NewTestEngine(false, 0, DefaultMaxSamplesPerQuery)) if testCase.expectedError == "" { require.NoError(t, err) From e7c77f7b4097eb4ddcde99aed2a9296870b79052 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 12:38:27 +0100 Subject: [PATCH 067/112] promql: export NewTestQuery So that tests can call it from another package. Signed-off-by: Bryan Boreham --- promql/engine.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/promql/engine.go b/promql/engine.go index b8a8ea095..08c787d5c 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -573,7 +573,8 @@ func (ng *Engine) validateOpts(expr parser.Expr) error { return validationErr } -func (ng *Engine) newTestQuery(f func(context.Context) error) Query { +// NewTestQuery: inject special behaviour into Query for testing. +func (ng *Engine) NewTestQuery(f func(context.Context) error) Query { qry := &query{ q: "test statement", stmt: parser.TestStmt(f), From 8fd96241ab04d829e198ad2f6f67e33f3bf5b564 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 10:48:24 +0100 Subject: [PATCH 068/112] test: add promqltest package references To packages outside of promql. Signed-off-by: Bryan Boreham --- cmd/promtool/main.go | 4 ++-- cmd/promtool/tsdb_test.go | 6 +++--- cmd/promtool/unittest.go | 13 +++++++------ cmd/promtool/unittest_test.go | 12 ++++++------ rules/alerting_test.go | 17 +++++++++-------- rules/manager_test.go | 9 +++++---- rules/recording_test.go | 5 +++-- storage/remote/read_handler_test.go | 8 ++++---- web/api/v1/api_test.go | 11 ++++++----- web/federate_test.go | 3 ++- 10 files changed, 47 insertions(+), 41 deletions(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index a62ae4fbf..c0484adcc 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -56,8 +56,8 @@ import ( "github.com/prometheus/prometheus/model/rulefmt" "github.com/prometheus/prometheus/notifier" _ "github.com/prometheus/prometheus/plugins" // Register plugins. - "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) @@ -377,7 +377,7 @@ func main() { case testRulesCmd.FullCommand(): os.Exit(RulesUnitTest( - promql.LazyLoaderOpts{ + promqltest.LazyLoaderOpts{ EnableAtModifier: true, EnableNegativeOffset: true, }, diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 36a65d73e..70e887765 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -26,7 +26,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/tsdb" ) @@ -88,7 +88,7 @@ func normalizeNewLine(b []byte) []byte { } func TestTSDBDump(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{foo="bar", baz="abc"} 1 2 3 4 5 heavy_metric{foo="bar"} 5 4 3 2 1 @@ -158,7 +158,7 @@ func TestTSDBDump(t *testing.T) { } func TestTSDBDumpOpenMetrics(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m my_counter{foo="bar", baz="abc"} 1 2 3 4 5 my_gauge{bar="foo", abc="baz"} 9 8 0 4 7 diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 6d6683a93..27d5dd98b 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -36,13 +36,14 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" ) // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { failed := false var run *regexp.Regexp @@ -69,7 +70,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFla return successExitCode } -func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error { fmt.Println("Unit Testing: ", filename) b, err := os.ReadFile(filename) @@ -175,9 +176,9 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { // Setup testing suite. - suite, err := promql.NewLazyLoader(tg.seriesLoadingString(), queryOpts) + suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { return []error{err} } @@ -413,7 +414,7 @@ Outer: gotSamples = append(gotSamples, parsedSample{ Labels: s.Metric.Copy(), Value: s.F, - Histogram: promql.HistogramTestExpression(s.H), + Histogram: promqltest.HistogramTestExpression(s.H), }) } @@ -443,7 +444,7 @@ Outer: expSamples = append(expSamples, parsedSample{ Labels: lb, Value: s.Value, - Histogram: promql.HistogramTestExpression(hist), + Histogram: promqltest.HistogramTestExpression(hist), }) } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 971ddb40c..2dbd5a4e5 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" ) func TestRulesUnitTest(t *testing.T) { @@ -28,7 +28,7 @@ func TestRulesUnitTest(t *testing.T) { tests := []struct { name string args args - queryOpts promql.LazyLoaderOpts + queryOpts promqltest.LazyLoaderOpts want int }{ { @@ -92,7 +92,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/at-modifier-test.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableAtModifier: true, }, want: 0, @@ -109,7 +109,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/negative-offset-test.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableNegativeOffset: true, }, want: 0, @@ -119,7 +119,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/no-test-group-interval.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableNegativeOffset: true, }, want: 0, @@ -142,7 +142,7 @@ func TestRulesUnitTestRun(t *testing.T) { tests := []struct { name string args args - queryOpts promql.LazyLoaderOpts + queryOpts promqltest.LazyLoaderOpts want int }{ { diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 5fae3edd1..a9315b47e 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" @@ -148,7 +149,7 @@ func TestAlertingRuleTemplateWithHistogram(t *testing.T) { } func TestAlertingRuleLabelsUpdate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 stale `) @@ -252,7 +253,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { } func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -345,7 +346,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } func TestAlertingRuleExternalURLInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -438,7 +439,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -492,7 +493,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { } func TestAlertingRuleQueryInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 70 85 70 70 `) @@ -601,7 +602,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { } func TestAlertingRuleLimit(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{label="1"} 1 metric{label="2"} 1 @@ -783,7 +784,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { } func TestKeepFiringFor(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 10x5 `) @@ -893,7 +894,7 @@ func TestKeepFiringFor(t *testing.T) { } func TestPendingAndKeepFiringFor(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 10x10 `) diff --git a/rules/manager_test.go b/rules/manager_test.go index 07159145f..665b1379a 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/tsdbutil" @@ -50,7 +51,7 @@ func TestMain(m *testing.M) { } func TestAlertingRule(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 @@ -190,7 +191,7 @@ func TestAlertingRule(t *testing.T) { } func TestForStateAddSamples(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 @@ -347,7 +348,7 @@ func sortAlerts(items []*Alert) { } func TestForStateRestore(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 @@ -1229,7 +1230,7 @@ func TestRuleHealthUpdates(t *testing.T) { } func TestRuleGroupEvalIterationFunc(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{instance="0"} 75 85 50 0 0 25 0 0 40 0 120 `) diff --git a/rules/recording_test.go b/rules/recording_test.go index 24b7d6539..49f37b1ac 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -111,7 +112,7 @@ var ruleEvalTestScenarios = []struct { } func setUpRuleEvalTest(t require.TestingT) *teststorage.TestStorage { - return promql.LoadedStorage(t, ` + return promqltest.LoadedStorage(t, ` load 1m metric{label_a="1",label_b="3"} 1 metric{label_a="2",label_b="4"} 10 @@ -178,7 +179,7 @@ func TestRuleEvalDuplicate(t *testing.T) { } func TestRecordingRuleLimit(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{label="1"} 1 metric{label="2"} 1 diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index e8e0ecb8d..452b29221 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -30,14 +30,14 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/prompb" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/teststorage" ) func TestSampledReadEndpoint(t *testing.T) { - store := promql.LoadedStorage(t, ` + store := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar",baz="qux"} 1 `) @@ -132,7 +132,7 @@ func TestSampledReadEndpoint(t *testing.T) { } func BenchmarkStreamReadEndpoint(b *testing.B) { - store := promql.LoadedStorage(b, ` + store := promqltest.LoadedStorage(b, ` load 1m test_metric1{foo="bar1",baz="qux"} 0+100x119 test_metric1{foo="bar2",baz="qux"} 0+100x120 @@ -200,7 +200,7 @@ func TestStreamReadEndpoint(t *testing.T) { // Second with 121 float samples, We expect 1 frame with 2 chunks. // Third with 241 float samples. We expect 1 frame with 2 chunks, and 1 frame with 1 chunk for the same series due to bytes limit. // Fourth with 25 histogram samples. We expect 1 frame with 1 chunk. - store := promql.LoadedStorage(t, ` + store := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar1",baz="qux"} 0+100x119 test_metric1{foo="bar2",baz="qux"} 0+100x120 diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index bb2a73f6d..9d7d1d502 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -49,6 +49,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" @@ -338,7 +339,7 @@ var sampleFlagMap = map[string]string{ } func TestEndpoints(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -502,7 +503,7 @@ func (b byLabels) Less(i, j int) bool { return labels.Compare(b[i], b[j]) < 0 } func TestGetSeries(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -606,7 +607,7 @@ func TestGetSeries(t *testing.T) { func TestQueryExemplars(t *testing.T) { start := time.Unix(0, 0) - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -725,7 +726,7 @@ func TestQueryExemplars(t *testing.T) { func TestLabelNames(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -3835,7 +3836,7 @@ func TestExtractQueryOpts(t *testing.T) { // Test query timeout parameter. func TestQueryTimeout(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 `) diff --git a/web/federate_test.go b/web/federate_test.go index f201210ec..e5adb97eb 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -34,6 +34,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/util/teststorage" @@ -201,7 +202,7 @@ test_metric_without_labels{instance="baz"} 1001 6000000 } func TestFederation(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar",instance="i"} 0+100x100 test_metric1{foo="boo",instance="i"} 1+0x100 From 4a72607c4a59d51042a282d02b930e355b4ac033 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 12:39:49 +0100 Subject: [PATCH 069/112] refactor: extract some PromQL Engine tests which use unexported structs Signed-off-by: Bryan Boreham --- promql/engine_internal_test.go | 82 ++++++++++++++++++++++++++++++++++ promql/engine_test.go | 59 ------------------------ 2 files changed, 82 insertions(+), 59 deletions(-) create mode 100644 promql/engine_internal_test.go diff --git a/promql/engine_internal_test.go b/promql/engine_internal_test.go new file mode 100644 index 000000000..cb501b2fd --- /dev/null +++ b/promql/engine_internal_test.go @@ -0,0 +1,82 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "errors" + "testing" + + "github.com/go-kit/log" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/util/annotations" +) + +func TestRecoverEvaluatorRuntime(t *testing.T) { + var output []interface{} + logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { + output = append(output, keyvals...) + return nil + })) + ev := &evaluator{logger: logger} + + expr, _ := parser.ParseExpr("sum(up)") + + var err error + + defer func() { + require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") + require.Contains(t, output, "sum(up)") + }() + defer ev.recover(expr, nil, &err) + + // Cause a runtime panic. + var a []int + a[123] = 1 +} + +func TestRecoverEvaluatorError(t *testing.T) { + ev := &evaluator{logger: log.NewNopLogger()} + var err error + + e := errors.New("custom error") + + defer func() { + require.EqualError(t, err, e.Error()) + }() + defer ev.recover(nil, nil, &err) + + panic(e) +} + +func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { + ev := &evaluator{logger: log.NewNopLogger()} + var err error + var ws annotations.Annotations + + warnings := annotations.New().Add(errors.New("custom warning")) + e := errWithWarnings{ + err: errors.New("custom error"), + warnings: warnings, + } + + defer func() { + require.EqualError(t, err, e.Error()) + require.Equal(t, warnings, ws, "wrong warning message") + }() + defer ev.recover(nil, &ws, &err) + + panic(e) +} diff --git a/promql/engine_test.go b/promql/engine_test.go index 1c2722a1d..826531789 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -23,8 +23,6 @@ import ( "testing" "time" - "github.com/go-kit/log" - "github.com/stretchr/testify/require" "go.uber.org/goleak" @@ -1714,63 +1712,6 @@ load 1ms } } -func TestRecoverEvaluatorRuntime(t *testing.T) { - var output []interface{} - logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { - output = append(output, keyvals...) - return nil - })) - ev := &evaluator{logger: logger} - - expr, _ := parser.ParseExpr("sum(up)") - - var err error - - defer func() { - require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") - require.Contains(t, output, "sum(up)") - }() - defer ev.recover(expr, nil, &err) - - // Cause a runtime panic. - var a []int - a[123] = 1 -} - -func TestRecoverEvaluatorError(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} - var err error - - e := errors.New("custom error") - - defer func() { - require.EqualError(t, err, e.Error()) - }() - defer ev.recover(nil, nil, &err) - - panic(e) -} - -func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} - var err error - var ws annotations.Annotations - - warnings := annotations.New().Add(errors.New("custom warning")) - e := errWithWarnings{ - err: errors.New("custom error"), - warnings: warnings, - } - - defer func() { - require.EqualError(t, err, e.Error()) - require.Equal(t, warnings, ws, "wrong warning message") - }() - defer ev.recover(nil, &ws, &err) - - panic(e) -} - func TestSubquerySelector(t *testing.T) { type caseType struct { Query string From 0dbfd20b695a8dfa0ddc23e76fda69b6ffcbc4bb Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 29 Apr 2024 13:14:18 +0100 Subject: [PATCH 070/112] test: move most PromQL tests into separate test package So that they can import promqltest which imports promql. Signed-off-by: Bryan Boreham --- promql/bench_test.go | 13 +- promql/engine_test.go | 569 ++++++++++++++++++++------------------- promql/functions_test.go | 11 +- promql/promql_test.go | 9 +- promql/value_test.go | 27 +- 5 files changed, 331 insertions(+), 298 deletions(-) diff --git a/promql/bench_test.go b/promql/bench_test.go index 516b0d748..9a8529091 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -23,13 +23,14 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/teststorage" ) -func setupRangeQueryTestData(stor *teststorage.TestStorage, _ *Engine, interval, numIntervals int) error { +func setupRangeQueryTestData(stor *teststorage.TestStorage, _ *promql.Engine, interval, numIntervals int) error { ctx := context.Background() metrics := []labels.Labels{} @@ -249,13 +250,13 @@ func BenchmarkRangeQuery(b *testing.B) { stor := teststorage.New(b) stor.DB.DisableCompactions() // Don't want auto-compaction disrupting timings. defer stor.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, Timeout: 100 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. @@ -324,7 +325,7 @@ func BenchmarkNativeHistograms(b *testing.B) { }, } - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, @@ -338,7 +339,7 @@ func BenchmarkNativeHistograms(b *testing.B) { for _, tc := range cases { b.Run(tc.name, func(b *testing.B) { - ng := NewEngine(opts) + ng := promql.NewEngine(opts) for i := 0; i < b.N; i++ { qry, err := ng.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) if err != nil { diff --git a/promql/engine_test.go b/promql/engine_test.go index 826531789..5ca110824 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -29,8 +29,10 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/almost" @@ -40,6 +42,12 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) +const ( + env = "query execution" + defaultLookbackDelta = 5 * time.Minute + defaultEpsilon = 0.000001 // Relative error allowed for sample values. +) + func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } @@ -50,10 +58,10 @@ func TestQueryConcurrency(t *testing.T) { dir, err := os.MkdirTemp("", "test_concurrency") require.NoError(t, err) defer os.RemoveAll(dir) - queryTracker := NewActiveQueryTracker(dir, maxConcurrency, nil) + queryTracker := promql.NewActiveQueryTracker(dir, maxConcurrency, nil) t.Cleanup(queryTracker.Close) - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, @@ -61,7 +69,7 @@ func TestQueryConcurrency(t *testing.T) { ActiveQueryTracker: queryTracker, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -84,7 +92,7 @@ func TestQueryConcurrency(t *testing.T) { } for i := 0; i < maxConcurrency; i++ { - q := engine.newTestQuery(f) + q := engine.NewTestQuery(f) go q.Exec(ctx) select { case <-processing: @@ -94,7 +102,7 @@ func TestQueryConcurrency(t *testing.T) { } } - q := engine.newTestQuery(f) + q := engine.NewTestQuery(f) go q.Exec(ctx) select { @@ -120,18 +128,33 @@ func TestQueryConcurrency(t *testing.T) { } } +// contextDone returns an error if the context was canceled or timed out. +func contextDone(ctx context.Context, env string) error { + if err := ctx.Err(); err != nil { + switch { + case errors.Is(err, context.Canceled): + return promql.ErrQueryCanceled(env) + case errors.Is(err, context.DeadlineExceeded): + return promql.ErrQueryTimeout(env) + default: + return err + } + } + return nil +} + func TestQueryTimeout(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 5 * time.Millisecond, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { time.Sleep(100 * time.Millisecond) return contextDone(ctx, "test statement execution") }) @@ -139,20 +162,20 @@ func TestQueryTimeout(t *testing.T) { res := query.Exec(ctx) require.Error(t, res.Err, "expected timeout error but got none") - var e ErrQueryTimeout + var e promql.ErrQueryTimeout require.ErrorAs(t, res.Err, &e, "expected timeout error but got: %s", res.Err) } -const errQueryCanceled = ErrQueryCanceled("test statement execution") +const errQueryCanceled = promql.ErrQueryCanceled("test statement execution") func TestQueryCancel(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -160,13 +183,13 @@ func TestQueryCancel(t *testing.T) { block := make(chan struct{}) processing := make(chan struct{}) - query1 := engine.newTestQuery(func(ctx context.Context) error { + query1 := engine.NewTestQuery(func(ctx context.Context) error { processing <- struct{}{} <-block return contextDone(ctx, "test statement execution") }) - var res *Result + var res *promql.Result go func() { res = query1.Exec(ctx) @@ -182,7 +205,7 @@ func TestQueryCancel(t *testing.T) { require.Equal(t, errQueryCanceled, res.Err) // Canceling a query before starting it must have no effect. - query2 := engine.newTestQuery(func(ctx context.Context) error { + query2 := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) @@ -220,14 +243,14 @@ func (e errSeriesSet) Err() error { return e.err } func (e errSeriesSet) Warnings() annotations.Annotations { return nil } func TestQueryError(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) - errStorage := ErrStorage{errors.New("storage error")} + engine := promql.NewEngine(opts) + errStorage := promql.ErrStorage{errors.New("storage error")} queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return &errQuerier{err: errStorage}, nil }) @@ -269,7 +292,7 @@ func (h *hintRecordingQuerier) Select(ctx context.Context, sortSeries bool, hint } func TestSelectHintsSetCorrectly(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, @@ -560,11 +583,11 @@ func TestSelectHintsSetCorrectly(t *testing.T) { }, } { t.Run(tc.query, func(t *testing.T) { - engine := NewEngine(opts) + engine := promql.NewEngine(opts) hintsRecorder := &noopHintRecordingQueryable{} var ( - query Query + query promql.Query err error ) ctx := context.Background() @@ -585,13 +608,13 @@ func TestSelectHintsSetCorrectly(t *testing.T) { } func TestEngineShutdown(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) block := make(chan struct{}) @@ -604,13 +627,13 @@ func TestEngineShutdown(t *testing.T) { <-block return contextDone(ctx, "test statement execution") } - query1 := engine.newTestQuery(f) + query1 := engine.NewTestQuery(f) // Stopping the engine must cancel the base context. While executing queries is // still possible, their context is canceled from the beginning and execution should // terminate immediately. - var res *Result + var res *promql.Result go func() { res = query1.Exec(ctx) processing <- struct{}{} @@ -624,7 +647,7 @@ func TestEngineShutdown(t *testing.T) { require.Error(t, res.Err, "expected error on shutdown during query but got none") require.Equal(t, errQueryCanceled, res.Err) - query2 := engine.newTestQuery(func(context.Context) error { + query2 := engine.NewTestQuery(func(context.Context) error { require.FailNow(t, "reached query execution unexpectedly") return nil }) @@ -634,12 +657,12 @@ func TestEngineShutdown(t *testing.T) { res2 := query2.Exec(ctx) require.Error(t, res2.Err, "expected error on querying with canceled context but got none") - var e ErrQueryCanceled + var e promql.ErrQueryCanceled require.ErrorAs(t, res2.Err, &e, "expected cancellation error but got: %s", res2.Err) } func TestEngineEvalStmtTimestamps(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric 1 2 `) @@ -656,13 +679,13 @@ load 10s // Instant queries. { Query: "1", - Result: Scalar{V: 1, T: 1000}, + Result: promql.Scalar{V: 1, T: 1000}, Start: time.Unix(1, 0), }, { Query: "metric", - Result: Vector{ - Sample{ + Result: promql.Vector{ + promql.Sample{ F: 1, T: 1000, Metric: labels.FromStrings("__name__", "metric"), @@ -672,9 +695,9 @@ load 10s }, { Query: "metric[20s]", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -683,9 +706,9 @@ load 10s // Range queries. { Query: "1", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, Metric: labels.EmptyLabels(), }, }, @@ -695,9 +718,9 @@ load 10s }, { Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -707,9 +730,9 @@ load 10s }, { Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -726,7 +749,7 @@ load 10s for i, c := range cases { t.Run(fmt.Sprintf("%d query=%s", i, c.Query), func(t *testing.T) { var err error - var qry Query + var qry promql.Query engine := newTestEngine() if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start) @@ -748,7 +771,7 @@ load 10s } func TestQueryStatistics(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metricWith1SampleEvery10Seconds 1+1x100 metricWith3SampleEvery10Seconds{a="1",b="1"} 1+1x100 @@ -1271,12 +1294,12 @@ load 10s origMaxSamples := engine.maxSamplesPerQuery for _, c := range cases { t.Run(c.Query, func(t *testing.T) { - opts := NewPrometheusQueryOpts(true, 0) + opts := promql.NewPrometheusQueryOpts(true, 0) engine.maxSamplesPerQuery = origMaxSamples runQuery := func(expErr error) *stats.Statistics { var err error - var qry Query + var qry promql.Query if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, opts, c.Query, c.Start) } else { @@ -1300,13 +1323,13 @@ load 10s return } engine.maxSamplesPerQuery = stats.Samples.PeakSamples - 1 - runQuery(ErrTooManySamples(env)) + runQuery(promql.ErrTooManySamples(env)) }) } } func TestMaxQuerySamples(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric 1+1x100 bigmetric{a="1"} 1+1x100 @@ -1422,7 +1445,7 @@ load 10s Interval: 5 * time.Second, }, { - // Sample as above but with only 1 part as step invariant. + // promql.Sample as above but with only 1 part as step invariant. // Here the peak is caused by the non-step invariant part as it touches more time range. // Hence at peak it is 2*21 (subquery from 0s to 20s) // + 11 (buffer of a series per evaluation) @@ -1455,7 +1478,7 @@ load 10s engine := newTestEngine() testFunc := func(expError error) { var err error - var qry Query + var qry promql.Query if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start) } else { @@ -1478,14 +1501,14 @@ load 10s // Exceeding limit. engine.maxSamplesPerQuery = c.MaxSamples - 1 - testFunc(ErrTooManySamples(env)) + testFunc(promql.ErrTooManySamples(env)) }) } } func TestAtModifier(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric{job="1"} 0+1x1000 metric{job="2"} 0+2x1000 @@ -1529,137 +1552,137 @@ load 1ms { // Time of the result is the evaluation time. query: `metric_neg @ 0`, start: 100, - result: Vector{ - Sample{F: 1, T: 100000, Metric: lblsneg}, + result: promql.Vector{ + promql.Sample{F: 1, T: 100000, Metric: lblsneg}, }, }, { query: `metric_neg @ -200`, start: 100, - result: Vector{ - Sample{F: 201, T: 100000, Metric: lblsneg}, + result: promql.Vector{ + promql.Sample{F: 201, T: 100000, Metric: lblsneg}, }, }, { query: `metric{job="2"} @ 50`, start: -2, end: 2, interval: 1, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 10, T: -2000}, {F: 10, T: -1000}, {F: 10, T: 0}, {F: 10, T: 1000}, {F: 10, T: 2000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 10, T: -2000}, {F: 10, T: -1000}, {F: 10, T: 0}, {F: 10, T: 1000}, {F: 10, T: 2000}}, Metric: lbls2, }, }, }, { // Timestamps for matrix selector does not depend on the evaluation time. query: "metric[20s] @ 300", start: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, Metric: lbls1, }, - Series{ - Floats: []FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, }, { query: `metric_neg[2s] @ 0`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, }, { query: `metric_neg[3s] @ -500`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, Metric: lblsneg, }, }, }, { query: `metric_ms[3ms] @ 2.345`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, Metric: lblsms, }, }, }, { query: "metric[100s:25s] @ 300", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, Metric: lbls1, }, - Series{ - Floats: []FPoint{{F: 40, T: 200000}, {F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 40, T: 200000}, {F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, }, { query: "metric_neg[50s:25s] @ 0", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, }, { query: "metric_neg[50s:25s] @ -100", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}}, Metric: lblsneg, }, }, }, { query: `metric_ms[100ms:25ms] @ 2.345`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 2250, T: 2250}, {F: 2275, T: 2275}, {F: 2300, T: 2300}, {F: 2325, T: 2325}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2250, T: 2250}, {F: 2275, T: 2275}, {F: 2300, T: 2300}, {F: 2325, T: 2325}}, Metric: lblsms, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 100))`, start: 50, end: 80, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 995, T: 50000}, {F: 994, T: 60000}, {F: 993, T: 70000}, {F: 992, T: 80000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 995, T: 50000}, {F: 994, T: 60000}, {F: 993, T: 70000}, {F: 992, T: 80000}}, Metric: lblstopk3, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 5000))`, start: 50, end: 80, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 10, T: 50000}, {F: 12, T: 60000}, {F: 14, T: 70000}, {F: 16, T: 80000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 10, T: 50000}, {F: 12, T: 60000}, {F: 14, T: 70000}, {F: 16, T: 80000}}, Metric: lblstopk2, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ end()))`, start: 70, end: 100, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 993, T: 70000}, {F: 992, T: 80000}, {F: 991, T: 90000}, {F: 990, T: 100000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 993, T: 70000}, {F: 992, T: 80000}, {F: 991, T: 90000}, {F: 990, T: 100000}}, Metric: lblstopk3, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ start()))`, start: 100, end: 130, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 990, T: 100000}, {F: 989, T: 110000}, {F: 988, T: 120000}, {F: 987, T: 130000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 990, T: 100000}, {F: 989, T: 110000}, {F: 988, T: 120000}, {F: 987, T: 130000}}, Metric: lblstopk3, }, }, @@ -1668,9 +1691,9 @@ load 1ms // The trick here is that the query range should be > lookback delta. query: `timestamp(metric_timestamp @ 3600)`, start: 0, end: 7 * 60, interval: 60, - result: Matrix{ - Series{ - Floats: []FPoint{ + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{ {F: 3600, T: 0}, {F: 3600, T: 60 * 1000}, {F: 3600, T: 2 * 60 * 1000}, @@ -1693,7 +1716,7 @@ load 1ms } start, end, interval := time.Unix(c.start, 0), time.Unix(c.end, 0), time.Duration(c.interval)*time.Second var err error - var qry Query + var qry promql.Query if c.end == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.query, start) } else { @@ -1703,9 +1726,9 @@ load 1ms res := qry.Exec(context.Background()) require.NoError(t, res.Err) - if expMat, ok := c.result.(Matrix); ok { + if expMat, ok := c.result.(promql.Matrix); ok { sort.Sort(expMat) - sort.Sort(res.Value.(Matrix)) + sort.Sort(res.Value.(promql.Matrix)) } testutil.RequireEqual(t, c.result, res.Value, "query %q failed", c.query) }) @@ -1715,7 +1738,7 @@ load 1ms func TestSubquerySelector(t *testing.T) { type caseType struct { Query string - Result Result + Result promql.Result Start time.Time } @@ -1729,11 +1752,11 @@ func TestSubquerySelector(t *testing.T) { cases: []caseType{ { Query: "metric[20s:10s]", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1743,11 +1766,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s]", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1757,11 +1780,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 2s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1771,11 +1794,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 6s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1785,11 +1808,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 4s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1799,11 +1822,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 5s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1813,11 +1836,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 6s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1827,11 +1850,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 7s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1850,11 +1873,11 @@ func TestSubquerySelector(t *testing.T) { cases: []caseType{ { // Normal selector. Query: `http_requests{group=~"pro.*",instance="0"}[30s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 9990, T: 9990000}, {F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 9990, T: 9990000}, {F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1864,11 +1887,11 @@ func TestSubquerySelector(t *testing.T) { }, { // Default step. Query: `http_requests{group=~"pro.*",instance="0"}[5m:]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 9840, T: 9840000}, {F: 9900, T: 9900000}, {F: 9960, T: 9960000}, {F: 130, T: 10020000}, {F: 310, T: 10080000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 9840, T: 9840000}, {F: 9900, T: 9900000}, {F: 9960, T: 9960000}, {F: 130, T: 10020000}, {F: 310, T: 10080000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1878,11 +1901,11 @@ func TestSubquerySelector(t *testing.T) { }, { // Checking if high offset (>LookbackDelta) is being taken care of. Query: `http_requests{group=~"pro.*",instance="0"}[5m:] offset 20m`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 8640, T: 8640000}, {F: 8700, T: 8700000}, {F: 8760, T: 8760000}, {F: 8820, T: 8820000}, {F: 8880, T: 8880000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 8640, T: 8640000}, {F: 8700, T: 8700000}, {F: 8760, T: 8760000}, {F: 8820, T: 8820000}, {F: 8880, T: 8880000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1892,23 +1915,23 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `rate(http_requests[1m])[15s:5s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"), }, - Series{ - Floats: []FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"), }, - Series{ - Floats: []FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"), }, - Series{ - Floats: []FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"), }, }, @@ -1918,11 +1941,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 270, T: 90000}, {F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 270, T: 90000}, {F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -1932,11 +1955,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests)[40s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 800, T: 80000}, {F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 800, T: 80000}, {F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -1946,11 +1969,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s:5s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1000, T: 100000}, {F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1000, T: 100000}, {F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -1963,7 +1986,7 @@ func TestSubquerySelector(t *testing.T) { } { t.Run("", func(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, tst.loadString) + storage := promqltest.LoadedStorage(t, tst.loadString) t.Cleanup(func() { storage.Close() }) for _, c := range tst.cases { @@ -1973,7 +1996,7 @@ func TestSubquerySelector(t *testing.T) { res := qry.Exec(context.Background()) require.Equal(t, c.Result.Err, res.Err) - mat := res.Value.(Matrix) + mat := res.Value.(promql.Matrix) sort.Sort(mat) testutil.RequireEqual(t, c.Result.Value, mat) }) @@ -1984,7 +2007,7 @@ func TestSubquerySelector(t *testing.T) { func TestTimestampFunction_StepsMoreOftenThanSamples(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric 0+1x1000 `) @@ -1996,20 +2019,20 @@ load 1m interval := time.Second // We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. - expectedPoints := []FPoint{} + expectedPoints := []promql.FPoint{} for t := 0; t <= 59; t++ { - expectedPoints = append(expectedPoints, FPoint{F: 0, T: int64(t * 1000)}) + expectedPoints = append(expectedPoints, promql.FPoint{F: 0, T: int64(t * 1000)}) } expectedPoints = append( expectedPoints, - FPoint{F: 60, T: 60_000}, - FPoint{F: 60, T: 61_000}, + promql.FPoint{F: 60, T: 60_000}, + promql.FPoint{F: 60, T: 61_000}, ) - expectedResult := Matrix{ - Series{ + expectedResult := promql.Matrix{ + promql.Series{ Floats: expectedPoints, Metric: labels.EmptyLabels(), }, @@ -2046,25 +2069,25 @@ func (f *FakeQueryLogger) Log(l ...interface{}) error { } func TestQueryLogger_basic(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) queryExec := func() { ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) res := query.Exec(ctx) require.NoError(t, res.Err) } - // Query works without query log initialized. + // promql.Query works without query log initialized. queryExec() f1 := NewFakeQueryLogger() @@ -2097,21 +2120,21 @@ func TestQueryLogger_basic(t *testing.T) { } func TestQueryLogger_fields(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) ctx, cancelCtx := context.WithCancel(context.Background()) - ctx = NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) + ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) @@ -2126,22 +2149,22 @@ func TestQueryLogger_fields(t *testing.T) { } func TestQueryLogger_error(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) ctx, cancelCtx := context.WithCancel(context.Background()) - ctx = NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) + ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) defer cancelCtx() testErr := errors.New("failure") - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return testErr }) @@ -2947,7 +2970,7 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { t.Run(test.input, func(t *testing.T) { expr, err := parser.ParseExpr(test.input) require.NoError(t, err) - expr = PreprocessExpr(expr, startTime, endTime) + expr = promql.PreprocessExpr(expr, startTime, endTime) if test.outputTest { require.Equal(t, test.input, expr.String(), "error on input '%s'", test.input) } @@ -2958,64 +2981,64 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { func TestEngineOptsValidation(t *testing.T) { cases := []struct { - opts EngineOpts + opts promql.EngineOpts query string fail bool expError error }{ { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ 100", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ 100", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ 100)", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ 100)", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ start()", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ start()", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ start())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ start())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ end()", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ end()", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ end())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ end())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "metric @ 100", }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "rate(metric[1m] @ start())", }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "rate(metric[1h:1m] @ end())", }, { - opts: EngineOpts{EnableNegativeOffset: false}, - query: "metric offset -1s", fail: true, expError: ErrValidationNegativeOffsetDisabled, + opts: promql.EngineOpts{EnableNegativeOffset: false}, + query: "metric offset -1s", fail: true, expError: promql.ErrValidationNegativeOffsetDisabled, }, { - opts: EngineOpts{EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableNegativeOffset: true}, query: "metric offset -1s", }, { - opts: EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, query: "metric @ 100 offset -2m", }, { - opts: EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, query: "metric offset -2m @ 100", }, } for _, c := range cases { - eng := NewEngine(c.opts) + eng := promql.NewEngine(c.opts) _, err1 := eng.NewInstantQuery(context.Background(), nil, nil, c.query, time.Unix(10, 0)) _, err2 := eng.NewRangeQuery(context.Background(), nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second) if c.fail { @@ -3043,9 +3066,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -3058,9 +3081,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000 0 0 0 0`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -3073,9 +3096,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000 10000 100000 1000000 10000000`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}, {F: 110000, T: 180000}, {F: 11000000, T: 240000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}, {F: 110000, T: 180000}, {F: 11000000, T: 240000}}, Metric: labels.EmptyLabels(), }, }, @@ -3088,9 +3111,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 5 17 42 2 7 905 51`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 5, T: 0}, {F: 59, T: 60000}, {F: 9, T: 120000}, {F: 956, T: 180000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 5, T: 0}, {F: 59, T: 60000}, {F: 9, T: 120000}, {F: 956, T: 180000}}, Metric: labels.EmptyLabels(), }, }, @@ -3103,9 +3126,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s metric 1+1x4`, Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -3118,9 +3141,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s metric 1+1x8`, Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -3134,16 +3157,16 @@ func TestRangeQuery(t *testing.T) { foo{job="1"} 1+1x4 bar{job="2"} 1+1x4`, Query: `foo > 2 or bar`, - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings( "__name__", "bar", "job", "2", ), }, - Series{ - Floats: []FPoint{{F: 3, T: 60000}, {F: 5, T: 120000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings( "__name__", "foo", "job", "1", @@ -3159,9 +3182,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s requests{job="1", __address__="bar"} 100`, Query: `requests * 2`, - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 200, T: 0}, {F: 200, T: 60000}, {F: 200, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 200, T: 0}, {F: 200, T: 60000}, {F: 200, T: 120000}}, Metric: labels.FromStrings( "__address__", "bar", "job", "1", @@ -3176,7 +3199,7 @@ func TestRangeQuery(t *testing.T) { for _, c := range cases { t.Run(c.Name, func(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, c.Load) + storage := promqltest.LoadedStorage(t, c.Load) t.Cleanup(func() { storage.Close() }) qry, err := engine.NewRangeQuery(context.Background(), storage, nil, c.Query, c.Start, c.End, c.Interval) @@ -3244,7 +3267,7 @@ func TestNativeHistogramRate(t *testing.T) { require.Len(t, matrix, 1) require.Len(t, matrix[0].Histograms, 2) actualHistograms := matrix[0].Histograms - expectedHistograms := []HPoint{{ + expectedHistograms := []promql.HPoint{{ T: 300000, H: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, @@ -4408,7 +4431,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { } require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, ts int64, exp Vector) { + queryAndCheck := func(queryString string, ts int64, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4432,7 +4455,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // sum(). queryString := fmt.Sprintf("sum(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) queryString = `sum({idx="0"})` var annos annotations.Annotations @@ -4444,26 +4467,26 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { for idx := 1; idx < len(c.histograms); idx++ { queryString += fmt.Sprintf(` + ignoring(idx) %s{idx="%d"}`, seriesName, idx) } - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) // count(). queryString = fmt.Sprintf("count(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) // avg(). queryString = fmt.Sprintf("avg(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) offset := int64(len(c.histograms) - 1) newTs := ts + offset*int64(time.Minute/time.Millisecond) // sum_over_time(). queryString = fmt.Sprintf("sum_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}}) // avg_over_time(). queryString = fmt.Sprintf("avg_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) }) idx0++ } @@ -4670,7 +4693,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { } require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, exp Vector) { + queryAndCheck := func(queryString string, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4698,7 +4721,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { for idx := 1; idx < len(c.histograms); idx++ { queryString += fmt.Sprintf(` - ignoring(idx) %s{idx="%d"}`, seriesName, idx) } - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) }) } idx0++ @@ -4828,7 +4851,7 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { require.NoError(t, err) require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, exp Vector) { + queryAndCheck := func(queryString string, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4843,27 +4866,27 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { // histogram * scalar. queryString := fmt.Sprintf(`%s * %f`, seriesName, c.scalar) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // scalar * histogram. queryString = fmt.Sprintf(`%f * %s`, c.scalar, seriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // histogram * float. queryString = fmt.Sprintf(`%s * %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // float * histogram. queryString = fmt.Sprintf(`%s * %s`, floatSeriesName, seriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // histogram / scalar. queryString = fmt.Sprintf(`%s / %f`, seriesName, c.scalar) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) // histogram / float. queryString = fmt.Sprintf(`%s / %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) }) idx0++ } @@ -4934,7 +4957,7 @@ metric 0 1 2 c := c t.Run(c.name, func(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, load) + storage := promqltest.LoadedStorage(t, load) t.Cleanup(func() { storage.Close() }) if c.engineLookback != 0 { @@ -4946,7 +4969,7 @@ metric 0 1 2 res := qry.Exec(context.Background()) require.NoError(t, res.Err) - vec, ok := res.Value.(Vector) + vec, ok := res.Value.(promql.Vector) require.True(t, ok) if c.expectSamples { require.NotEmpty(t, vec) @@ -4956,3 +4979,9 @@ metric 0 1 2 }) } } + +func makeInt64Pointer(val int64) *int64 { + valp := new(int64) + *valp = val + return valp +} diff --git a/promql/functions_test.go b/promql/functions_test.go index 08e4900f5..aef59c837 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -22,6 +22,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/teststorage" ) @@ -32,13 +33,13 @@ func TestDeriv(t *testing.T) { // so we test it by hand. storage := teststorage.New(t) defer storage.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) a := storage.Appender(context.Background()) @@ -69,13 +70,13 @@ func TestDeriv(t *testing.T) { func TestFunctionList(t *testing.T) { // Test that Functions and parser.Functions list the same functions. - for i := range FunctionCalls { + for i := range promql.FunctionCalls { _, ok := parser.Functions[i] require.True(t, ok, "function %s exists in promql package, but not in parser package", i) } for i := range parser.Functions { - _, ok := FunctionCalls[i] + _, ok := promql.FunctionCalls[i] require.True(t, ok, "function %s exists in parser package, but not in promql package", i) } } diff --git a/promql/promql_test.go b/promql/promql_test.go index 87ec2cd4e..59ec4756e 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -22,11 +22,12 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" ) -func newTestEngine() *Engine { +func newTestEngine() *promql.Engine { return promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) } @@ -38,13 +39,13 @@ func TestEvaluations(t *testing.T) { func TestConcurrentRangeQueries(t *testing.T) { stor := teststorage.New(t) defer stor.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, Timeout: 100 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. diff --git a/promql/value_test.go b/promql/value_test.go index c93ba8213..0017b41e2 100644 --- a/promql/value_test.go +++ b/promql/value_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "testing" @@ -19,39 +19,40 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" ) func TestVector_ContainsSameLabelset(t *testing.T) { for name, tc := range map[string]struct { - vector Vector + vector promql.Vector expected bool }{ "empty vector": { - vector: Vector{}, + vector: promql.Vector{}, expected: false, }, "vector with one series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, }, expected: false, }, "vector with two different series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, }, expected: false, }, "vector with two equal series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "a")}, }, expected: true, }, "vector with three series, two equal": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, {Metric: labels.FromStrings("lbl", "a")}, @@ -67,35 +68,35 @@ func TestVector_ContainsSameLabelset(t *testing.T) { func TestMatrix_ContainsSameLabelset(t *testing.T) { for name, tc := range map[string]struct { - matrix Matrix + matrix promql.Matrix expected bool }{ "empty matrix": { - matrix: Matrix{}, + matrix: promql.Matrix{}, expected: false, }, "matrix with one series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, }, expected: false, }, "matrix with two different series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, }, expected: false, }, "matrix with two equal series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "a")}, }, expected: true, }, "matrix with three series, two equal": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, {Metric: labels.FromStrings("lbl", "a")}, From d699dc3c7706944aafa56682ede765398f925ef0 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 8 May 2024 17:57:09 +0200 Subject: [PATCH 071/112] Fix language in docs and comments (#14041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix language in docs and comments --------- Signed-off-by: Arve Knudsen Co-authored-by: Björn Rabenstein --- model/relabel/relabel.go | 2 +- storage/series.go | 4 ++-- tsdb/blockwriter.go | 2 +- tsdb/chunkenc/varbit.go | 4 ++-- tsdb/chunks/chunks.go | 6 +++--- tsdb/chunks/head_chunks.go | 14 +++++++------- tsdb/compact.go | 10 +++++----- tsdb/db.go | 4 ++-- tsdb/docs/format/head_chunks.md | 8 ++++---- tsdb/docs/format/index.md | 2 +- tsdb/docs/usage.md | 4 ++-- tsdb/exemplar.go | 2 +- tsdb/head_append.go | 4 ++-- tsdb/head_wal.go | 2 +- tsdb/index/index.go | 4 ++-- tsdb/record/record.go | 4 ++-- 16 files changed, 38 insertions(+), 38 deletions(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index d29c3d07a..4ddfa26c7 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -48,7 +48,7 @@ const ( Drop Action = "drop" // KeepEqual drops targets for which the input does not match the target. KeepEqual Action = "keepequal" - // Drop drops targets for which the input does match the target. + // DropEqual drops targets for which the input does match the target. DropEqual Action = "dropequal" // HashMod sets a label to the modulus of a hash of labels. HashMod Action = "hashmod" diff --git a/storage/series.go b/storage/series.go index eba11b4d9..70e3d0a19 100644 --- a/storage/series.go +++ b/storage/series.go @@ -55,8 +55,8 @@ func NewListSeries(lset labels.Labels, s []chunks.Sample) *SeriesEntry { } } -// NewListChunkSeriesFromSamples returns chunk series entry that allows to iterate over provided samples. -// NOTE: It uses inefficient chunks encoding implementation, not caring about chunk size. +// NewListChunkSeriesFromSamples returns a chunk series entry that allows to iterate over provided samples. +// NOTE: It uses an inefficient chunks encoding implementation, not caring about chunk size. // Use only for testing. func NewListChunkSeriesFromSamples(lset labels.Labels, samples ...[]chunks.Sample) *ChunkSeriesEntry { chksFromSamples := make([]chunks.Meta, 0, len(samples)) diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 73bc5f1e3..32346d69d 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -42,7 +42,7 @@ type BlockWriter struct { // ErrNoSeriesAppended is returned if the series count is zero while flushing blocks. var ErrNoSeriesAppended = errors.New("no series appended, aborting") -// NewBlockWriter create a new block writer. +// NewBlockWriter creates a new block writer. // // The returned writer accumulates all the series in the Head block until `Flush` is called. // diff --git a/tsdb/chunkenc/varbit.go b/tsdb/chunkenc/varbit.go index b43574dcb..574edec48 100644 --- a/tsdb/chunkenc/varbit.go +++ b/tsdb/chunkenc/varbit.go @@ -61,7 +61,7 @@ func putVarbitInt(b *bstream, val int64) { } } -// readVarbitInt reads an int64 encoced with putVarbitInt. +// readVarbitInt reads an int64 encoded with putVarbitInt. func readVarbitInt(b *bstreamReader) (int64, error) { var d byte for i := 0; i < 8; i++ { @@ -166,7 +166,7 @@ func putVarbitUint(b *bstream, val uint64) { } } -// readVarbitUint reads a uint64 encoced with putVarbitUint. +// readVarbitUint reads a uint64 encoded with putVarbitUint. func readVarbitUint(b *bstreamReader) (uint64, error) { var d byte for i := 0; i < 8; i++ { diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index 0826f6967..e7df0eeed 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -233,7 +233,7 @@ func ChunkMetasToSamples(chunks []Meta) (result []Sample) { // Iterator iterates over the chunks of a single time series. type Iterator interface { // At returns the current meta. - // It depends on implementation if the chunk is populated or not. + // It depends on the implementation whether the chunk is populated or not. At() Meta // Next advances the iterator by one. Next() bool @@ -478,7 +478,7 @@ func (w *Writer) WriteChunks(chks ...Meta) error { // the batch is too large to fit in the current segment. cutNewBatch := (i != 0) && (batchSize+SegmentHeaderSize > w.segmentSize) - // When the segment already has some data than + // If the segment already has some data then // the first batch size calculation should account for that. if firstBatch && w.n > SegmentHeaderSize { cutNewBatch = batchSize+w.n > w.segmentSize @@ -717,7 +717,7 @@ func nextSequenceFile(dir string) (string, int, error) { } // It is not necessary that we find the files in number order, // for example with '1000000' and '200000', '1000000' would come first. - // Though this is a very very race case, we check anyway for the max id. + // Though this is a very very rare case, we check anyway for the max id. if j > i { i = j } diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index 087f25fbb..66dbb07b7 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -188,8 +188,8 @@ func (f *chunkPos) bytesToWriteForChunk(chkLen uint64) uint64 { return bytes } -// ChunkDiskMapper is for writing the Head block chunks to the disk -// and access chunks via mmapped file. +// ChunkDiskMapper is for writing the Head block chunks to disk +// and access chunks via mmapped files. type ChunkDiskMapper struct { /// Writer. dir *os.File @@ -231,7 +231,7 @@ type ChunkDiskMapper struct { closed bool } -// mmappedChunkFile provides mmapp access to an entire head chunks file that holds many chunks. +// mmappedChunkFile provides mmap access to an entire head chunks file that holds many chunks. type mmappedChunkFile struct { byteSlice ByteSlice maxt int64 // Max timestamp among all of this file's chunks. @@ -240,7 +240,7 @@ type mmappedChunkFile struct { // NewChunkDiskMapper returns a new ChunkDiskMapper against the given directory // using the default head chunk file duration. // NOTE: 'IterateAllChunks' method needs to be called at least once after creating ChunkDiskMapper -// to set the maxt of all the file. +// to set the maxt of all files. func NewChunkDiskMapper(reg prometheus.Registerer, dir string, pool chunkenc.Pool, writeBufferSize, writeQueueSize int) (*ChunkDiskMapper, error) { // Validate write buffer size. if writeBufferSize < MinWriteBufferSize || writeBufferSize > MaxWriteBufferSize { @@ -425,7 +425,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro return files, nil } -// WriteChunk writes the chunk to the disk. +// WriteChunk writes the chunk to disk. // The returned chunk ref is the reference from where the chunk encoding starts for the chunk. func (cdm *ChunkDiskMapper) WriteChunk(seriesRef HeadSeriesRef, mint, maxt int64, chk chunkenc.Chunk, isOOO bool, callback func(err error)) (chkRef ChunkDiskMapperRef) { // cdm.evtlPosMtx must be held to serialize the calls to cdm.evtlPos.getNextChunkRef() and the writing of the chunk (either with or without queue). @@ -784,7 +784,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error // IterateAllChunks iterates all mmappedChunkFiles (in order of head chunk file name/number) and all the chunks within it // and runs the provided function with information about each chunk. It returns on the first error encountered. // NOTE: This method needs to be called at least once after creating ChunkDiskMapper -// to set the maxt of all the file. +// to set the maxt of all files. func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16, encoding chunkenc.Encoding, isOOO bool) error) (err error) { cdm.writePathMtx.Lock() defer cdm.writePathMtx.Unlock() @@ -904,7 +904,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu return nil } -// Truncate deletes the head chunk files whose file number is less than given fileNo. +// Truncate deletes the head chunk files with numbers less than the given fileNo. func (cdm *ChunkDiskMapper) Truncate(fileNo uint32) error { cdm.readPathMtx.RLock() diff --git a/tsdb/compact.go b/tsdb/compact.go index e09039cf3..c2ae23b2e 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -272,7 +272,7 @@ func (c *LeveledCompactor) plan(dms []dirMeta) ([]string, error) { meta := dms[i].meta if meta.MaxTime-meta.MinTime < c.ranges[len(c.ranges)/2] { // If the block is entirely deleted, then we don't care about the block being big enough. - // TODO: This is assuming single tombstone is for distinct series, which might be no true. + // TODO: This is assuming a single tombstone is for a distinct series, which might not be true. if meta.Stats.NumTombstones > 0 && meta.Stats.NumTombstones >= meta.Stats.NumSeries { return []string{dms[i].dir}, nil } @@ -372,7 +372,7 @@ func splitByRange(ds []dirMeta, tr int64) [][]dirMeta { t0 = tr * ((m.MinTime - tr + 1) / tr) } // Skip blocks that don't fall into the range. This can happen via mis-alignment or - // by being the multiple of the intended range. + // by being a multiple of the intended range. if m.MaxTime > t0+tr { i++ continue @@ -395,7 +395,7 @@ func splitByRange(ds []dirMeta, tr int64) [][]dirMeta { return splitDirs } -// CompactBlockMetas merges many block metas into one, combining it's source blocks together +// CompactBlockMetas merges many block metas into one, combining its source blocks together // and adjusting compaction level. Min/Max time of result block meta covers all input blocks. func CompactBlockMetas(uid ulid.ULID, blocks ...*BlockMeta) *BlockMeta { res := &BlockMeta{ @@ -833,7 +833,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa chksIter = s.Iterator(chksIter) chks = chks[:0] for chksIter.Next() { - // We are not iterating in streaming way over chunk as + // We are not iterating in a streaming way over chunks as // it's more efficient to do bulk write for index and // chunk file purposes. chks = append(chks, chksIter.At()) @@ -842,7 +842,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa return fmt.Errorf("chunk iter: %w", err) } - // Skip the series with all deleted chunks. + // Skip series with all deleted chunks. if len(chks) == 0 { continue } diff --git a/tsdb/db.go b/tsdb/db.go index 22292ab16..c2e8904a2 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -206,7 +206,7 @@ type DB struct { compactor Compactor blocksToDelete BlocksToDeleteFunc - // Mutex for that must be held when modifying the general block layout or lastGarbageCollectedMmapRef. + // mtx must be held when modifying the general block layout or lastGarbageCollectedMmapRef. mtx sync.RWMutex blocks []*Block @@ -1431,7 +1431,7 @@ func (db *DB) reloadBlocks() (err error) { db.metrics.reloads.Inc() }() - // Now that we reload TSDB every minute, there is high chance for race condition with a reload + // Now that we reload TSDB every minute, there is a high chance for a race condition with a reload // triggered by CleanTombstones(). We need to lock the reload to avoid the situation where // a normal reload and CleanTombstones try to delete the same block. db.mtx.Lock() diff --git a/tsdb/docs/format/head_chunks.md b/tsdb/docs/format/head_chunks.md index 813d4008c..5737f4205 100644 --- a/tsdb/docs/format/head_chunks.md +++ b/tsdb/docs/format/head_chunks.md @@ -27,10 +27,10 @@ in-file offset (lower 4 bytes) and segment sequence number (upper 4 bytes). # Chunk -Unlike chunks in the on-disk blocks, here we additionally store series -reference that the chunks belongs to and the mint/maxt of the chunks. This is -because we don't have an index associated with these chunks, hence these meta -information are used while replaying the chunks. +Unlike chunks in the on-disk blocks, here we additionally store the series +reference that each chunk belongs to and the mint/maxt of the chunks. This is +because we don't have an index associated with these chunks, hence this metadata +is used while replaying the chunks. ``` ┌─────────────────────┬───────────────────────┬───────────────────────┬───────────────────┬───────────────┬──────────────┬────────────────┐ diff --git a/tsdb/docs/format/index.md b/tsdb/docs/format/index.md index 53b77d9ab..e0ef21bd5 100644 --- a/tsdb/docs/format/index.md +++ b/tsdb/docs/format/index.md @@ -40,7 +40,7 @@ Most of the sections described below start with a `len` field. It always specifi ### Symbol Table -The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size. +The symbol table holds a sorted list of deduplicated strings that occur in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size. The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded. Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order. diff --git a/tsdb/docs/usage.md b/tsdb/docs/usage.md index e70b24813..7bc1ae6c5 100644 --- a/tsdb/docs/usage.md +++ b/tsdb/docs/usage.md @@ -1,6 +1,6 @@ # Usage -TSDB can be - and is - used by other applications such as [Cortex](https://cortexmetrics.io/) and [Thanos](https://thanos.io/). +TSDB can be - and is - used by other applications such as [Cortex](https://cortexmetrics.io/), [Thanos](https://thanos.io/), and [Grafana Mimir](https://grafana.com/oss/mimir/). This directory contains documentation for any developers who wish to work on or with TSDB. For a full example of instantiating a database, adding and querying data, see the [tsdb example in the docs](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb). @@ -18,7 +18,7 @@ A `DB` has the following main components: * [`Head`](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb#DB.Head) * [Blocks (persistent blocks)](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb#DB.Blocks) -The `Head` is responsible for a lot. Here are its main components: +The `Head` is responsible for a lot. Here are its main components: * [WAL](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb/wal#WAL) (Write Ahead Log). * [`stripeSeries`](https://github.com/prometheus/prometheus/blob/411021ada9ab41095923b8d2df9365b632fd40c3/tsdb/head.go#L1292): diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 3dd784c62..213fff5c2 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -111,7 +111,7 @@ func NewExemplarMetrics(reg prometheus.Registerer) *ExemplarMetrics { return &m } -// NewCircularExemplarStorage creates an circular in memory exemplar storage. +// NewCircularExemplarStorage creates a circular in memory exemplar storage. // If we assume the average case 95 bytes per exemplar we can fit 5651272 exemplars in // 1GB of extra memory, accounting for the fact that this is heap allocated space. // If len <= 0, then the exemplar storage is essentially a noop storage but can later be diff --git a/tsdb/head_append.go b/tsdb/head_append.go index efd573b41..224f65314 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -1467,8 +1467,8 @@ func (s *memSeries) mmapChunks(chunkDiskMapper *chunks.ChunkDiskMapper) (count i return } - // Write chunks starting from the oldest one and stop before we get to current s.headChunk. - // If we have this chain: s.headChunk{t4} -> t3 -> t2 -> t1 -> t0 + // Write chunks starting from the oldest one and stop before we get to current s.headChunks. + // If we have this chain: s.headChunks{t4} -> t3 -> t2 -> t1 -> t0 // then we need to write chunks t0 to t3, but skip s.headChunks. for i := s.headChunks.len() - 1; i > 0; i-- { chk := s.headChunks.atOffset(i) diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 076768f4e..41f7dd46b 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -1496,7 +1496,7 @@ Outer: } default: - // This is a record type we don't understand. It is either and old format from earlier versions, + // This is a record type we don't understand. It is either an old format from earlier versions, // or a new format and the code was rolled back to old version. loopErr = fmt.Errorf("unsupported snapshot record type 0b%b", rec[0]) break Outer diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 89c2041a7..69e258125 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -158,7 +158,7 @@ type Writer struct { postingsEncoder PostingsEncoder } -// TOC represents index Table Of Content that states where each section of index starts. +// TOC represents the index Table Of Contents that states where each section of the index starts. type TOC struct { Symbols uint64 Series uint64 @@ -168,7 +168,7 @@ type TOC struct { PostingsTable uint64 } -// NewTOCFromByteSlice return parsed TOC from given index byte slice. +// NewTOCFromByteSlice returns a parsed TOC from the given index byte slice. func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) { if bs.Len() < indexTOCLen { return nil, encoding.ErrInvalidSize diff --git a/tsdb/record/record.go b/tsdb/record/record.go index 8a8409e55..c95b25f06 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -163,7 +163,7 @@ type RefMetadata struct { Help string } -// RefExemplar is an exemplar with it's labels, timestamp, value the exemplar was collected/observed with, and a reference to a series. +// RefExemplar is an exemplar with the labels, timestamp, value the exemplar was collected/observed with, and a reference to a series. type RefExemplar struct { Ref chunks.HeadSeriesRef T int64 @@ -798,7 +798,7 @@ func (e *Encoder) FloatHistogramSamples(histograms []RefFloatHistogramSample, b return buf.Get() } -// Encode encodes the Float Histogram into a byte slice. +// EncodeFloatHistogram encodes the Float Histogram into a byte slice. func EncodeFloatHistogram(buf *encoding.Encbuf, h *histogram.FloatHistogram) { buf.PutByte(byte(h.CounterResetHint)) From 786e0e706cc90cb54f59f0c95704473a963f995f Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 4 May 2024 14:05:11 +0100 Subject: [PATCH 072/112] test: PromQL: stop using internal fields of engine * set enablePerStepStats and lookback duration via `NewTestEngine` parameters. * check maxSamples by recreating query engine * check lookback without modifying internals Signed-off-by: Bryan Boreham --- promql/engine_test.go | 16 +++++----------- promql/promql_test.go | 2 +- promql/promqltest/test.go | 2 +- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index 5ca110824..59e707011 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -1289,13 +1289,10 @@ load 10s }, } - engine := newTestEngine() - engine.enablePerStepStats = true - origMaxSamples := engine.maxSamplesPerQuery for _, c := range cases { t.Run(c.Query, func(t *testing.T) { opts := promql.NewPrometheusQueryOpts(true, 0) - engine.maxSamplesPerQuery = origMaxSamples + engine := promqltest.NewTestEngine(true, 0, promqltest.DefaultMaxSamplesPerQuery) runQuery := func(expErr error) *stats.Statistics { var err error @@ -1322,7 +1319,7 @@ load 10s if c.SkipMaxCheck { return } - engine.maxSamplesPerQuery = stats.Samples.PeakSamples - 1 + engine = promqltest.NewTestEngine(true, 0, stats.Samples.PeakSamples-1) runQuery(promql.ErrTooManySamples(env)) }) } @@ -1496,11 +1493,11 @@ load 10s } // Within limit. - engine.maxSamplesPerQuery = c.MaxSamples + engine = promqltest.NewTestEngine(false, 0, c.MaxSamples) testFunc(nil) // Exceeding limit. - engine.maxSamplesPerQuery = c.MaxSamples - 1 + engine = promqltest.NewTestEngine(false, 0, c.MaxSamples-1) testFunc(promql.ErrTooManySamples(env)) }) } @@ -4956,13 +4953,10 @@ metric 0 1 2 for _, c := range cases { c := c t.Run(c.name, func(t *testing.T) { - engine := newTestEngine() + engine := promqltest.NewTestEngine(false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery) storage := promqltest.LoadedStorage(t, load) t.Cleanup(func() { storage.Close() }) - if c.engineLookback != 0 { - engine.lookbackDelta = c.engineLookback - } opts := promql.NewPrometheusQueryOpts(false, c.queryLookback) qry, err := engine.NewInstantQuery(context.Background(), storage, opts, query, c.ts) require.NoError(t, err) diff --git a/promql/promql_test.go b/promql/promql_test.go index 59ec4756e..7bafc02e3 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -32,7 +32,7 @@ func newTestEngine() *promql.Engine { } func TestEvaluations(t *testing.T) { - RunBuiltinTests(t, newTestEngine()) + promqltest.RunBuiltinTests(t, newTestEngine()) } // Run a lot of queries at the same time, to check for race conditions. diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 1b2ce78af..1affd91f6 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -76,7 +76,7 @@ func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamp return promql.NewEngine(promql.EngineOpts{ Logger: nil, Reg: nil, - MaxSamples: DefaultMaxSamplesPerQuery, + MaxSamples: maxSamples, Timeout: 100 * time.Second, NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) }, EnableAtModifier: true, From b7b4355807ca53dbff39dc9ed5baa80a8b34fed4 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Thu, 9 May 2024 10:00:24 +0200 Subject: [PATCH 073/112] Use bytes.Buffer from stack buf in Matcher.String() Also removed the growing until there's a benchmark for that. Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index ce9e42471..8e220e392 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -14,8 +14,8 @@ package labels import ( + "bytes" "strconv" - "unsafe" ) // MatchType is an enum for label matching types. @@ -79,20 +79,19 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - const quote = 1 - const matcher = 2 - // As we're not on go1.22 yet and we don't have the new fancy AvailableBuffer method on strings.Builder, - // we'll use a plain byte slice and then do the unsafe conversion to string just like strings.Builder does. - // We pre-allocate pessimistically for quoting the label name, and optimistically for not having to escape any quotes. - b := make([]byte, 0, quote+len(m.Name)+quote+matcher+quote+len(m.Value)+quote) + // Start a buffer with a pre-allocated size on stack to cover most needs. + var bytea [1024]byte + b := bytes.NewBuffer(bytea[:0]) + if m.shouldQuoteName() { - b = strconv.AppendQuote(b, m.Name) + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Name)) } else { - b = append(b, m.Name...) + b.WriteString(m.Name) } - b = append(b, m.Type.String()...) - b = strconv.AppendQuote(b, m.Value) - return *((*string)(unsafe.Pointer(&b))) + b.WriteString(m.Type.String()) + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Value)) + + return b.String() } func (m *Matcher) shouldQuoteName() bool { From 5c4310aa37a5587e7a4f4cb1487b4873a92f7d07 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 9 May 2024 11:55:30 +0200 Subject: [PATCH 074/112] [ENHANCEMENT] TSDB: Optimize querying with regexp matchers Add method `PostingsForLabelMatching` to `tsdb.IndexReader`, to obtain postings for labels with a certain name and values accepted by a provided callback, and use it from `tsdb.PostingsForMatchers`. The intention is to optimize regexp matcher paths, especially not having to load all label values before matching on them. Plus tests, and refactor some `tsdb/index.Reader` methods. Benchmarking shows memory reduction up to ~100%, and speedup of up to ~50%. Signed-off-by: Arve Knudsen Co-authored-by: Bartlomiej Plotka --- CHANGELOG.md | 1 + tsdb/block.go | 8 ++ tsdb/block_test.go | 81 ++++++++++++++++++++ tsdb/head_read.go | 4 + tsdb/head_read_test.go | 23 ++++++ tsdb/index/index.go | 170 +++++++++++++++++++++++++++-------------- tsdb/index/postings.go | 29 +++++++ tsdb/ooo_head_read.go | 4 + tsdb/querier.go | 19 +---- tsdb/querier_test.go | 14 ++++ 10 files changed, 278 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dfcc5c33..079dd7595 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 +* [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620 * [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991 * [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991 diff --git a/tsdb/block.go b/tsdb/block.go index abd223e4a..83b86a58d 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -77,6 +77,10 @@ type IndexReader interface { // during background garbage collections. Postings(ctx context.Context, name string, values ...string) (index.Postings, error) + // PostingsForLabelMatching returns a sorted iterator over postings having a label with the given name and a value for which match returns true. + // If no postings are found having at least one matching label, an empty iterator is returned. + PostingsForLabelMatching(ctx context.Context, name string, match func(value string) bool) index.Postings + // SortedPostings returns a postings list that is reordered to be sorted // by the label set of the underlying series. SortedPostings(index.Postings) index.Postings @@ -518,6 +522,10 @@ func (r blockIndexReader) Postings(ctx context.Context, name string, values ...s return p, nil } +func (r blockIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + return r.ir.PostingsForLabelMatching(ctx, name, match) +} + func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings { return r.ir.SortedPostings(p) } diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 6d15d1838..42acc3c69 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -36,6 +36,7 @@ import ( "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/fileutil" + "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/wlog" ) @@ -509,6 +510,86 @@ func TestLabelNamesWithMatchers(t *testing.T) { } } +func TestBlockIndexReader_PostingsForLabelMatching(t *testing.T) { + testPostingsForLabelMatching(t, 2, func(t *testing.T, series []labels.Labels) IndexReader { + var seriesEntries []storage.Series + for _, s := range series { + seriesEntries = append(seriesEntries, storage.NewListSeries(s, []chunks.Sample{sample{100, 0, nil, nil}})) + } + + blockDir := createBlock(t, t.TempDir(), seriesEntries) + files, err := sequenceFiles(chunkDir(blockDir)) + require.NoError(t, err) + require.NotEmpty(t, files, "No chunk created.") + + block, err := OpenBlock(nil, blockDir, nil) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, block.Close()) }) + + ir, err := block.Index() + require.NoError(t, err) + return ir + }) +} + +func testPostingsForLabelMatching(t *testing.T, offset storage.SeriesRef, setUp func(*testing.T, []labels.Labels) IndexReader) { + t.Helper() + + ctx := context.Background() + series := []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + } + ir := setUp(t, series) + t.Cleanup(func() { + require.NoError(t, ir.Close()) + }) + + testCases := []struct { + name string + labelName string + match func(string) bool + exp []storage.SeriesRef + }{ + { + name: "n=1", + labelName: "n", + match: func(val string) bool { + return val == "1" + }, + exp: []storage.SeriesRef{offset + 1, offset + 2, offset + 3}, + }, + { + name: "n=2", + labelName: "n", + match: func(val string) bool { + return val == "2" + }, + exp: []storage.SeriesRef{offset + 4}, + }, + { + name: "missing label", + labelName: "missing", + match: func(val string) bool { + return true + }, + exp: nil, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := ir.PostingsForLabelMatching(ctx, tc.labelName, tc.match) + require.NotNil(t, p) + srs, err := index.ExpandPostings(p) + require.NoError(t, err) + require.Equal(t, tc.exp, srs) + }) + } +} + // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []storage.Series) string { blockDir, err := CreateBlock(series, dir, 0, log.NewNopLogger()) diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 45bbc81f1..df15abcd5 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -121,6 +121,10 @@ func (h *headIndexReader) Postings(ctx context.Context, name string, values ...s } } +func (h *headIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + return h.head.postings.PostingsForLabelMatching(ctx, name, match) +} + func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { series := make([]*memSeries, 0, 128) diff --git a/tsdb/head_read_test.go b/tsdb/head_read_test.go index de97d70a5..8d835e943 100644 --- a/tsdb/head_read_test.go +++ b/tsdb/head_read_test.go @@ -14,6 +14,7 @@ package tsdb import ( + "context" "fmt" "sync" "testing" @@ -552,3 +553,25 @@ func TestMemSeries_chunk(t *testing.T) { }) } } + +func TestHeadIndexReader_PostingsForLabelMatching(t *testing.T) { + testPostingsForLabelMatching(t, 0, func(t *testing.T, series []labels.Labels) IndexReader { + opts := DefaultHeadOptions() + opts.ChunkRange = 1000 + opts.ChunkDirRoot = t.TempDir() + h, err := NewHead(nil, nil, nil, nil, opts, nil) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, h.Close()) + }) + app := h.Appender(context.Background()) + for _, s := range series { + app.Append(0, s, 0, 0) + } + require.NoError(t, app.Commit()) + + ir, err := h.Index() + require.NoError(t, err) + return ir + }) +} diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 69e258125..a36c33c4f 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -1536,36 +1536,14 @@ func (r *Reader) LabelValues(ctx context.Context, name string, matchers ...*labe if len(e) == 0 { return nil, nil } + values := make([]string, 0, len(e)*symbolFactor) - - d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) - d.Skip(e[0].off) lastVal := e[len(e)-1].value - - skip := 0 - for d.Err() == nil && ctx.Err() == nil { - if skip == 0 { - // These are always the same number of bytes, - // and it's faster to skip than parse. - skip = d.Len() - d.Uvarint() // Keycount. - d.UvarintBytes() // Label name. - skip -= d.Len() - } else { - d.Skip(skip) - } - s := yoloString(d.UvarintBytes()) // Label value. - values = append(values, s) - if s == lastVal { - break - } - d.Uvarint64() // Offset. - } - if d.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", d.Err()) - } - - return values, ctx.Err() + err := r.traversePostingOffsets(ctx, e[0].off, func(val string, _ uint64) (bool, error) { + values = append(values, val) + return val != lastVal, nil + }) + return values, err } // LabelNamesFor returns all the label names for the series referred to by IDs. @@ -1662,6 +1640,44 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch return nil } +// traversePostingOffsets traverses r's posting offsets table, starting at off, and calls cb with every label value and postings offset. +// If cb returns false (or an error), the traversing is interrupted. +func (r *Reader) traversePostingOffsets(ctx context.Context, off int, cb func(string, uint64) (bool, error)) error { + // Don't Crc32 the entire postings offset table, this is very slow + // so hope any issues were caught at startup. + d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) + d.Skip(off) + skip := 0 + ctxErr := ctx.Err() + for d.Err() == nil && ctxErr == nil { + if skip == 0 { + // These are always the same number of bytes, + // and it's faster to skip than to parse. + skip = d.Len() + d.Uvarint() // Keycount. + d.UvarintBytes() // Label name. + skip -= d.Len() + } else { + d.Skip(skip) + } + v := yoloString(d.UvarintBytes()) // Label value. + postingsOff := d.Uvarint64() // Offset. + if ok, err := cb(v, postingsOff); err != nil { + return err + } else if !ok { + break + } + ctxErr = ctx.Err() + } + if d.Err() != nil { + return fmt.Errorf("get postings offset entry: %w", d.Err()) + } + if ctxErr != nil { + return fmt.Errorf("get postings offset entry: %w", ctxErr) + } + return nil +} + func (r *Reader) Postings(ctx context.Context, name string, values ...string) (Postings, error) { if r.version == FormatV1 { e, ok := r.postingsV1[name] @@ -1696,7 +1712,6 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P slices.Sort(values) // Values must be in order so we can step through the table on disk. res := make([]Postings, 0, len(values)) - skip := 0 valueIndex := 0 for valueIndex < len(values) && values[valueIndex] < e[0].value { // Discard values before the start. @@ -1714,33 +1729,15 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P // Need to look from previous entry. i-- } - // Don't Crc32 the entire postings offset table, this is very slow - // so hope any issues were caught at startup. - d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) - d.Skip(e[i].off) - // Iterate on the offset table. - var postingsOff uint64 // The offset into the postings table. - for d.Err() == nil && ctx.Err() == nil { - if skip == 0 { - // These are always the same number of bytes, - // and it's faster to skip than parse. - skip = d.Len() - d.Uvarint() // Keycount. - d.UvarintBytes() // Label name. - skip -= d.Len() - } else { - d.Skip(skip) - } - v := d.UvarintBytes() // Label value. - postingsOff = d.Uvarint64() // Offset. - for string(v) >= value { - if string(v) == value { + if err := r.traversePostingOffsets(ctx, e[i].off, func(val string, postingsOff uint64) (bool, error) { + for val >= value { + if val == value { // Read from the postings table. d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) _, p, err := r.dec.Postings(d2.Get()) if err != nil { - return nil, fmt.Errorf("decode postings: %w", err) + return false, fmt.Errorf("decode postings: %w", err) } res = append(res, p) } @@ -1752,20 +1749,72 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P } if i+1 == len(e) || value >= e[i+1].value || valueIndex == len(values) { // Need to go to a later postings offset entry, if there is one. - break + return false, nil } - } - if d.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", d.Err()) - } - if ctx.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", ctx.Err()) + return true, nil + }); err != nil { + return nil, err } } return Merge(ctx, res...), nil } +func (r *Reader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { + if r.version == FormatV1 { + return r.postingsForLabelMatchingV1(ctx, name, match) + } + + e := r.postings[name] + if len(e) == 0 { + return EmptyPostings() + } + + lastVal := e[len(e)-1].value + var its []Postings + if err := r.traversePostingOffsets(ctx, e[0].off, func(val string, postingsOff uint64) (bool, error) { + if match(val) { + // We want this postings iterator since the value is a match + postingsDec := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) + _, p, err := r.dec.PostingsFromDecbuf(postingsDec) + if err != nil { + return false, fmt.Errorf("decode postings: %w", err) + } + its = append(its, p) + } + return val != lastVal, nil + }); err != nil { + return ErrPostings(err) + } + + return Merge(ctx, its...) +} + +func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, match func(string) bool) Postings { + e := r.postingsV1[name] + if len(e) == 0 { + return EmptyPostings() + } + + var its []Postings + for val, offset := range e { + if !match(val) { + continue + } + + // Read from the postings table. + d := encoding.NewDecbufAt(r.b, int(offset), castagnoliTable) + _, p, err := r.dec.PostingsFromDecbuf(d) + if err != nil { + return ErrPostings(fmt.Errorf("decode postings: %w", err)) + } + + its = append(its, p) + } + + return Merge(ctx, its...) +} + // SortedPostings returns the given postings list reordered so that the backing series // are sorted. func (r *Reader) SortedPostings(p Postings) Postings { @@ -1856,6 +1905,11 @@ type Decoder struct { // Postings returns a postings list for b and its number of elements. func (dec *Decoder) Postings(b []byte) (int, Postings, error) { d := encoding.Decbuf{B: b} + return dec.PostingsFromDecbuf(d) +} + +// PostingsFromDecbuf returns a postings list for d and its number of elements. +func (dec *Decoder) PostingsFromDecbuf(d encoding.Decbuf) (int, Postings, error) { n := d.Be32int() l := d.Get() if d.Err() != nil { diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 61a5560ee..136b3441e 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -397,6 +397,35 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { } } +func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { + p.mtx.RLock() + + e := p.m[name] + if len(e) == 0 { + p.mtx.RUnlock() + return EmptyPostings() + } + + // Benchmarking shows that first copying the values into a slice and then matching over that is + // faster than matching over the map keys directly, at least on AMD64. + vals := make([]string, 0, len(e)) + for v, srs := range e { + if len(srs) > 0 { + vals = append(vals, v) + } + } + + var its []Postings + for _, v := range vals { + if match(v) { + its = append(its, NewListPostings(e[v])) + } + } + p.mtx.RUnlock() + + return Merge(ctx, its...) +} + // ExpandPostings returns the postings expanded as a slice. func ExpandPostings(p Postings) (res []storage.SeriesRef, err error) { for p.Next() { diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index ed0b3fd22..af431d678 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -446,6 +446,10 @@ func (ir *OOOCompactionHeadIndexReader) Postings(_ context.Context, name string, return index.NewListPostings(ir.ch.postings), nil } +func (ir *OOOCompactionHeadIndexReader) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + return index.ErrPostings(errors.New("not supported")) +} + func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.Postings { // This will already be sorted from the Postings() call above. return p diff --git a/tsdb/querier.go b/tsdb/querier.go index a6763e996..1170493be 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -326,23 +326,8 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) } } - vals, err := ix.LabelValues(ctx, m.Name) - if err != nil { - return nil, err - } - - res := vals[:0] - for _, val := range vals { - if m.Matches(val) { - res = append(res, val) - } - } - - if len(res) == 0 { - return index.EmptyPostings(), nil - } - - return ix.Postings(ctx, m.Name, res...) + it := ix.PostingsForLabelMatching(ctx, m.Name, m.Matches) + return it, it.Err() } // inversePostingsForMatcher returns the postings for the series with the label name set but not matching the matcher. diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index a293a983d..16de6373d 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2326,6 +2326,16 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings { return index.NewListPostings(ep) } +func (m mockIndex) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + var res []index.Postings + for l, srs := range m.postings { + if l.Name == name && match(l.Value) { + res = append(res, index.NewListPostings(srs)) + } + } + return index.Merge(ctx, res...) +} + func (m mockIndex) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings { out := make([]storage.SeriesRef, 0, 128) @@ -3238,6 +3248,10 @@ func (m mockMatcherIndex) LabelNames(context.Context, ...*labels.Matcher) ([]str return []string{}, nil } +func (m mockMatcherIndex) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + return index.ErrPostings(fmt.Errorf("PostingsForLabelMatching called")) +} + func TestPostingsForMatcher(t *testing.T) { ctx := context.Background() From aabe4d6e4ab39d4d87611668312ec4d93616b5b8 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 29 Apr 2024 16:16:51 +0200 Subject: [PATCH 075/112] promql.ActiveQueryTracker: Unmap mmapped file when done Signed-off-by: Arve Knudsen --- promql/engine_test.go | 4 +++- promql/query_logger.go | 32 ++++++++++++++++++++++++++++---- promql/query_logger_test.go | 27 ++++++++++++++------------- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index cc9185546..485239399 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -59,7 +59,9 @@ func TestQueryConcurrency(t *testing.T) { require.NoError(t, err) defer os.RemoveAll(dir) queryTracker := promql.NewActiveQueryTracker(dir, maxConcurrency, nil) - t.Cleanup(queryTracker.Close) + t.Cleanup(func() { + require.NoError(t, queryTracker.Close()) + }) opts := promql.EngineOpts{ Logger: nil, diff --git a/promql/query_logger.go b/promql/query_logger.go index 7ddd8c2d5..76528f958 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -16,6 +16,7 @@ package promql import ( "context" "encoding/json" + "fmt" "io" "os" "path/filepath" @@ -36,6 +37,8 @@ type ActiveQueryTracker struct { maxConcurrent int } +var _ io.Closer = &ActiveQueryTracker{} + type Entry struct { Query string `json:"query"` Timestamp int64 `json:"timestamp_sec"` @@ -83,6 +86,23 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { } } +type mmapedFile struct { + f io.Closer + m mmap.MMap +} + +func (f *mmapedFile) Close() error { + err := f.m.Unmap() + if fErr := f.f.Close(); fErr != nil && err == nil { + return fmt.Errorf("close mmapedFile.f: %w", fErr) + } + + if err != nil { + return fmt.Errorf("mmapedFile: unmapping: %w", err) + } + return nil +} + func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { @@ -108,7 +128,7 @@ func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io return nil, nil, err } - return fileAsBytes, file, err + return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err } func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { @@ -204,9 +224,13 @@ func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int } } -func (tracker *ActiveQueryTracker) Close() { +// Close closes tracker. +func (tracker *ActiveQueryTracker) Close() error { if tracker == nil || tracker.closer == nil { - return + return nil } - tracker.closer.Close() + if err := tracker.closer.Close(); err != nil { + return fmt.Errorf("close ActiveQueryTracker.closer: %w", err) + } + return nil } diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 376d61b64..7bd93781e 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -16,6 +16,7 @@ package promql import ( "context" "os" + "path/filepath" "testing" "github.com/grafana/regexp" @@ -104,26 +105,26 @@ func TestIndexReuse(t *testing.T) { } func TestMMapFile(t *testing.T) { - file, err := os.CreateTemp("", "mmapedFile") + dir := t.TempDir() + fpath := filepath.Join(dir, "mmapedFile") + const data = "ab" + + fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) require.NoError(t, err) + copy(fileAsBytes, data) + require.NoError(t, closer.Close()) - filename := file.Name() - defer os.Remove(filename) - - fileAsBytes, _, err := getMMapedFile(filename, 2, nil) - - require.NoError(t, err) - copy(fileAsBytes, "ab") - - f, err := os.Open(filename) + f, err := os.Open(fpath) require.NoError(t, err) + t.Cleanup(func() { + _ = f.Close() + }) bytes := make([]byte, 4) n, err := f.Read(bytes) - require.Equal(t, 2, n) require.NoError(t, err, "Unexpected error while reading file.") - - require.Equal(t, fileAsBytes, bytes[:2], "Mmap failed") + require.Equal(t, 2, n) + require.Equal(t, []byte(data), bytes[:2], "Mmap failed") } func TestParseBrokenJSON(t *testing.T) { From 3b8b57700c469c7cde84e1d8f9d383cb8fe11ab0 Mon Sep 17 00:00:00 2001 From: Anthony Mirabella Date: Fri, 10 May 2024 04:41:21 -0400 Subject: [PATCH 076/112] otlp: Remove OTel feature gate registration from copied translation package (#13932) Signed-off-by: Anthony J Mirabella Signed-off-by: Jesus Vazquez --- go.mod | 2 -- go.sum | 2 -- .../otlptranslator/prometheus/normalize_label.go | 11 +---------- .../otlptranslator/prometheus/normalize_name.go | 10 +--------- 4 files changed, 2 insertions(+), 23 deletions(-) diff --git a/go.mod b/go.mod index 14bdd93a0..7b94f792e 100644 --- a/go.mod +++ b/go.mod @@ -60,7 +60,6 @@ require ( github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c github.com/stretchr/testify v1.9.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/featuregate v1.5.0 go.opentelemetry.io/collector/pdata v1.5.0 go.opentelemetry.io/collector/semconv v0.98.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0 @@ -151,7 +150,6 @@ require ( github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-retryablehttp v0.7.4 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect - github.com/hashicorp/go-version v1.6.0 // indirect github.com/hashicorp/golang-lru v0.6.0 // indirect github.com/hashicorp/serf v0.10.1 // indirect github.com/imdario/mergo v0.3.16 // indirect diff --git a/go.sum b/go.sum index 2272c6176..d030a96c4 100644 --- a/go.sum +++ b/go.sum @@ -722,8 +722,6 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/featuregate v1.5.0 h1:uK8qnYQKz1TMkK+FDTFsywg/EybW/gbnOUaPNUkRznM= -go.opentelemetry.io/collector/featuregate v1.5.0/go.mod h1:w7nUODKxEi3FLf1HslCiE6YWtMtOOrMnSwsDam8Mg9w= go.opentelemetry.io/collector/pdata v1.5.0 h1:1fKTmUpr0xCOhP/B0VEvtz7bYPQ45luQ8XFyA07j8LE= go.opentelemetry.io/collector/pdata v1.5.0/go.mod h1:TYj8aKRWZyT/KuKQXKyqSEvK/GV+slFaDMEI+Ke64Yw= go.opentelemetry.io/collector/semconv v0.98.0 h1:zO4L4TmlxXoYu8UgPeYElGY19BW7wPjM+quL5CzoOoY= diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index 4f9942bd1..6360aa976 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -19,15 +19,6 @@ package prometheus import ( "strings" "unicode" - - "go.opentelemetry.io/collector/featuregate" -) - -var dropSanitizationGate = featuregate.GlobalRegistry().MustRegister( - "pkg.translator.prometheus.PermissiveLabelSanitization", - featuregate.StageAlpha, - featuregate.WithRegisterDescription("Controls whether to change labels starting with '_' to 'key_'."), - featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"), ) // Normalizes the specified label to follow Prometheus label names standard @@ -50,7 +41,7 @@ func NormalizeLabel(label string) string { // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { label = "key_" + label - } else if strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") && !dropSanitizationGate.IsEnabled() { + } else if strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") { label = "key" + label } diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 6cb4fc199..4cf36671a 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -20,7 +20,6 @@ import ( "strings" "unicode" - "go.opentelemetry.io/collector/featuregate" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -78,13 +77,6 @@ var perUnitMap = map[string]string{ "y": "year", } -var normalizeNameGate = featuregate.GlobalRegistry().MustRegister( - "pkg.translator.prometheus.NormalizeName", - featuregate.StageBeta, - featuregate.WithRegisterDescription("Controls whether metrics names are automatically normalized to follow Prometheus naming convention"), - featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"), -) - // BuildCompliantName builds a Prometheus-compliant metric name for the specified metric // // Metric name is prefixed with specified namespace and underscore (if any). @@ -97,7 +89,7 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix var metricName string // Full normalization following standard Prometheus naming conventions - if addMetricSuffixes && normalizeNameGate.IsEnabled() { + if addMetricSuffixes { return normalizeName(metric, namespace) } From fdfc6d472556171c7f95c9ac185803b4d5cabd89 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 15:36:55 +0200 Subject: [PATCH 077/112] Benchmark zeroOrOneCharacterStringMatcher.Matches This adds some more test cases for unicode values, and also a benchmark for zeroOrOneCharacterStringMatcher.Matches() Signed-off-by: Oleg Zaytsev --- model/labels/regexp_test.go | 80 +++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 47d3eeb4a..41160cc05 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -19,6 +19,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -926,19 +927,74 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) { } func TestZeroOrOneCharacterStringMatcher(t *testing.T) { - matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.True(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.True(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) - matcher = &zeroOrOneCharacterStringMatcher{matchNL: false} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.False(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("do not match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: false} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.False(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) + + t.Run("unicode", func(t *testing.T) { + // Just for documentation purposes, emoji1 is 1 rune, emoji2 is 2 runes. + // Having this in mind, will make future readers fixing tests easier. + emoji1 := "😀" + emoji2 := "❤️" + require.Equal(t, 1, utf8.RuneCountInString(emoji1)) + require.Equal(t, 2, utf8.RuneCountInString(emoji2)) + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches(emoji1)) + require.False(t, matcher.Matches(emoji2)) + require.False(t, matcher.Matches(emoji1+emoji1)) + require.False(t, matcher.Matches("x"+emoji1)) + require.False(t, matcher.Matches(emoji1+"x")) + require.False(t, matcher.Matches(emoji1+emoji2)) + }) +} + +func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { + type benchCase struct { + str string + matches bool + } + + emoji1 := "😀" + emoji2 := "❤️" + cases := []benchCase{ + {"", true}, + {"x", true}, + {"\n", true}, + {"xx", false}, + {"\n\n", false}, + {emoji1, true}, + {emoji2, false}, + {emoji1 + emoji1, false}, + {strings.Repeat("x", 100), false}, + {strings.Repeat(emoji1, 100), false}, + {strings.Repeat(emoji2, 100), false}, + } + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + b.ResetTimer() + + for n := 0; n < b.N; n++ { + c := cases[n%len(cases)] + got := matcher.Matches(c.str) + if got != c.matches { + b.Fatalf("unexpected result for %q: got %t, want %t", c.str, got, c.matches) + } + } } func TestLiteralPrefixStringMatcher(t *testing.T) { From bcff5059e6764084cea51342efd138f86eb4da42 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 15:41:00 +0200 Subject: [PATCH 078/112] Use utf8.DecodeRuneInString(s) This replaces the custom `moreThanOneRune` function with the standard `utf8.DecodeRuneInString(s)` that can be used to figure out the size of the first rune. Signed-off-by: Oleg Zaytsev --- model/labels/regexp.go | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 79e340984..cc2ab366f 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -828,7 +828,8 @@ type zeroOrOneCharacterStringMatcher struct { } func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { - if moreThanOneRune(s) { + // If there's more than one rune in the string, then it can't match. + if _, size := utf8.DecodeRuneInString(s); size < len(s) { return false } @@ -840,27 +841,6 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { return s[0] != '\n' } -// moreThanOneRune returns true if there are more than one runes in the string. -// It doesn't check whether the string is valid UTF-8. -// The return value should be always equal to utf8.RuneCountInString(s) > 1, -// but the function is optimized for the common case where the string prefix is ASCII. -func moreThanOneRune(s string) bool { - // If len(s) is exactly one or zero, there can't be more than one rune. - // Exit through this path quickly. - if len(s) <= 1 { - return false - } - - // There's one or more bytes: - // If first byte is ASCII then there are multiple runes if there are more bytes after that. - if s[0] < utf8.RuneSelf { - return len(s) > 1 - } - - // Less common case: first is a multibyte rune. - return utf8.RuneCountInString(s) > 1 -} - // trueMatcher is a stringMatcher which matches any string (always returns true). type trueMatcher struct{} From dbe88fae22d4e8106580e90f3aa4f328d7cb484c Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 17:05:31 +0200 Subject: [PATCH 079/112] Add invalid utf8 test cases to regexp Signed-off-by: Oleg Zaytsev --- model/labels/regexp_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 41160cc05..e9f12d7b6 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -37,6 +37,7 @@ var ( ".*foo", "^.*foo$", "^.+foo$", + ".?", ".*", ".+", "foo.+", @@ -89,6 +90,12 @@ var ( // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", + + // Invalid utf8 + "\xfefoo", + "foo\xfe", + "\xfd", + "\xff\xff", } ) From 8b4c9459a2226945e6596489e58dbbe4999e537b Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 17:44:07 +0200 Subject: [PATCH 080/112] Check utf8.RuneError result Signed-off-by: Oleg Zaytsev --- model/labels/regexp.go | 6 +++++- model/labels/regexp_test.go | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index cc2ab366f..b484e2716 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -829,7 +829,11 @@ type zeroOrOneCharacterStringMatcher struct { func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { // If there's more than one rune in the string, then it can't match. - if _, size := utf8.DecodeRuneInString(s); size < len(s) { + if r, size := utf8.DecodeRuneInString(s); r == utf8.RuneError { + // Size is 0 for empty strings, 1 for invalid rune. + // Empty string matches, invalid rune matches if there isn't anything else. + return size == len(s) + } else if size < len(s) { return false } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index e9f12d7b6..1db90a473 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -968,6 +968,23 @@ func TestZeroOrOneCharacterStringMatcher(t *testing.T) { require.False(t, matcher.Matches(emoji1+"x")) require.False(t, matcher.Matches(emoji1+emoji2)) }) + + t.Run("invalid unicode", func(t *testing.T) { + // Just for reference, we also compare to what `^.?$` regular expression matches. + re := regexp.MustCompile("^.?$") + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + + requireMatches := func(s string, expected bool) { + t.Helper() + require.Equal(t, expected, matcher.Matches(s)) + require.Equal(t, re.MatchString(s), matcher.Matches(s)) + } + + requireMatches("\xff", true) + requireMatches("x\xff", false) + requireMatches("\xffx", false) + requireMatches("\xff\xfe", false) + }) } func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { From 548bd9d6fbc7baba9dc3306ae495a5ba9266c052 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Mon, 26 Feb 2024 01:16:26 +0530 Subject: [PATCH 081/112] adds TestNativeHistogramRate func to promql test framework Signed-off-by: Neeraj Gartia --- promql/promqltest/testdata/native_histograms.test | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 1da68a385..281dce786 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -269,3 +269,13 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 + +# Test Native histogram rate func +load 5m + test_histogram {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x10 + +eval instant at 50m rate(test_histogram[5m]) + {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} + +eval instant at 50m rate(test_histogram[10m]) + {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} From 8b838a05d99080a023093099feb4b694f21b774a Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Fri, 22 Mar 2024 03:17:47 +0530 Subject: [PATCH 082/112] adds test for native histogram rate func in promql testing framework Signed-off-by: Neeraj Gartia --- promql/promqltest/testdata/native_histograms.test | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 281dce786..a5e2af344 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -271,11 +271,8 @@ eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 # Test Native histogram rate func -load 5m - test_histogram {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x10 +load 15s + test_histogram {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 -eval instant at 50m rate(test_histogram[5m]) - {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} - -eval instant at 50m rate(test_histogram[10m]) - {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} +eval instant at 5m rate(test_histogram[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} \ No newline at end of file From adf5a36c1ef3056750013a565b6c44e3c822accf Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Thu, 11 Apr 2024 01:47:09 +0530 Subject: [PATCH 083/112] adds test for sum, count, stddev, stdvar, quantile and fraction func to promql testing framework Signed-off-by: Neeraj Gartia --- .../testdata/native_histograms.test | 456 +++++++++++++++++- 1 file changed, 452 insertions(+), 4 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index a5e2af344..8df633819 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -270,9 +270,457 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 -# Test Native histogram rate func +# Apply rate function to histogram. load 15s - test_histogram {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 + histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 -eval instant at 5m rate(test_histogram[45s]) - {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} \ No newline at end of file +eval instant at 5m rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} + +eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 + +# Apply count and sum function to empty histogram. +load 10m + histogram_count_sum_1 {{}}x1 + +eval instant at 10m histogram_count(histogram_count_sum_1) + {} NaN + +eval instant at 10m histogram_sum(histogram_count_sum_1) + {} NaN + +# Apply count and sum function to non-empty histogram. +load 10m + histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_count(histogram_count_sum_2) + {} 24 + +eval instant at 10m histogram_sum(histogram_count_sum_2) + {} 100 + +# Apply stddev and stdvar function to histogram with {1, 2, 3, 4} (low res). +load 10m + histogram_stddev_stdvar_1 {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_1) + {} 1.0787993180043811 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_1) + {} 1.163807968526718 + +# Apply stddev and stdvar function to histogram with {1, 1, 1, 1} (high res). +load 10m + histogram_stddev_stdvar_2 {{schema:8 count:10 sum:10 buckets:[1 2 3 4]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_2) + {} 0.0048960313898237465 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_2) + {} 2.3971123370139447e-05 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9}. +load 10m + histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) + {} 42.947236400258 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) + {} 1844.4651144196398 + +# Apply stddev and stdvar function to histogram with {-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3}. +load 10m + histogram_stddev_stdvar_4 {{schema:0 count:10 sum:-112946 z_bucket:0 n_buckets:[0 0 1 1 1 0 1 1 0 0 3 0 0 0 1 0 0 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_4) + {} 27556.344499842 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_4) + {} 759352122.1939945 + +# Apply stddev and stdvar function to histogram with {-10x10}. +load 10m + histogram_stddev_stdvar_5 {{schema:0 count:10 sum:-100 z_bucket:0 n_buckets:[0 0 0 0 10]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_5) + {} 1.3137084989848 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_5) + {} 1.725830020304794 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, NaN}. +load 10m + histogram_stddev_stdvar_6 {{schema:3 count:7 sum:NaN z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_6) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_6) + {} NaN + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, Inf}. +load 10m + histogram_stddev_stdvar_7 {{schema:3 count:7 sum:Inf z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) + {} NaN + +# Apply quantile function to histogram with all positive buckets with zero bucket +load 10m + histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_1) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_1) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) + {} 15.759999999999998 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) + {} 13.600000000000001 + +eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) + {} 4.799999999999997 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) + {} 1.6666666666666665 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) + {} 0.0006000000000000001 + +eval instant at 10m histogram_quantile(0, histogram_quantile_1) + {} 0 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_1) + {} -Inf + +# Apply quantile function to histogram with all negative buckets with zero bucket +load 10m + histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_2) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_2) + {} 0 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) + {} -6.000000000000048e-05 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) + {} -1.6666666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) + {} -13.6 + +eval instant at 10m histogram_quantile(0, histogram_quantile_2) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_2) + {} -Inf + +# Apply quantile function to histogram with both positive and negative buckets with zero bucket +load 10m + histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_3) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_3) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) + {} 15.519999999999996 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) + {} 11.200000000000003 + +eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) + {} 1.2666666666666657 + +eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) + {} 0.0006000000000000005 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) + {} 0 + +eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) + {} -1.266666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) + {} -11.2 + +eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) + {} -15.52 + +eval instant at 10m histogram_quantile(0, histogram_quantile_3) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_3) + {} -Inf + +# Apply fraction function to empty histogram. +load 10m + histogram_fraction_1 {{}}x1 + +eval instant at 10m histogram_fraction(3.1415, 42, histogram_fraction_1) + {} NaN + +# Apply fraction function to histogram with positive and zero buckets. +load 10m + histogram_fraction_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_2) + {} 1 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) + {} 0.25 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) + {} 0.125 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_2) + {} 1 + +# Apply fraction function to histogram with negative and zero buckets. +load 10m + histogram_fraction_3 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_3) + {} 1 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_3) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) + {} 0.25 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) + {} 0.125 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_3) + {} 1 + +# Apply fraction function to histogram with both positive, negative and zero buckets. +load 10m + histogram_fraction_4 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) + {} 1 \ No newline at end of file From 6119124d0ee6c656fa0768500fae63a01da46061 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Thu, 11 Apr 2024 18:13:49 +0530 Subject: [PATCH 084/112] some nits Signed-off-by: Neeraj Gartia --- .../promqltest/testdata/native_histograms.test | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 8df633819..7fc0403e3 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -280,17 +280,7 @@ eval instant at 5m rate(histogram_rate[45s]) eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 -# Apply count and sum function to empty histogram. -load 10m - histogram_count_sum_1 {{}}x1 - -eval instant at 10m histogram_count(histogram_count_sum_1) - {} NaN - -eval instant at 10m histogram_sum(histogram_count_sum_1) - {} NaN - -# Apply count and sum function to non-empty histogram. +# Apply count and sum function to histogram. load 10m histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -370,7 +360,7 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) {} NaN -# Apply quantile function to histogram with all positive buckets with zero bucket +# Apply quantile function to histogram with all positive buckets with zero bucket. load 10m histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -401,7 +391,7 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_1) eval instant at 10m histogram_quantile(-1, histogram_quantile_1) {} -Inf -# Apply quantile function to histogram with all negative buckets with zero bucket +# Apply quantile function to histogram with all negative buckets with zero bucket. load 10m histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 @@ -429,7 +419,7 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_2) eval instant at 10m histogram_quantile(-1, histogram_quantile_2) {} -Inf -# Apply quantile function to histogram with both positive and negative buckets with zero bucket +# Apply quantile function to histogram with both positive and negative buckets with zero bucket. load 10m histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 From 661856cb6596eabd0d5545fe16f8f09c1b98360d Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Tue, 7 May 2024 00:23:47 +0530 Subject: [PATCH 085/112] removes the added tests from engine_test.go Signed-off-by: Neeraj Gartia --- promql/engine_test.go | 1072 ----------------- .../testdata/native_histograms.test | 2 +- 2 files changed, 1 insertion(+), 1073 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index cc9185546..45e0054f0 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -34,8 +34,6 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/tsdb/tsdbutil" - "github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/teststorage" @@ -3209,1076 +3207,6 @@ func TestRangeQuery(t *testing.T) { } } -func TestNativeHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, h := range tsdbutil.GenerateTestHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), h, nil) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[45s])", seriesName) - t.Run("instant_query", func(t *testing.T) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) - }) - - t.Run("range_query", func(t *testing.T) { - step := 30 * time.Second - start := timestamp.Time(int64(5 * time.Minute / time.Millisecond)) - end := start.Add(step) - qry, err := engine.NewRangeQuery(context.Background(), storage, nil, queryString, start, end, step) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - matrix, err := res.Matrix() - require.NoError(t, err) - require.Len(t, matrix, 1) - require.Len(t, matrix[0].Histograms, 2) - actualHistograms := matrix[0].Histograms - expectedHistograms := []promql.HPoint{{ - T: 300000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }, { - T: 330000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }} - require.Equal(t, expectedHistograms, actualHistograms) - }) -} - -func TestNativeFloatHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, fh := range tsdbutil.GenerateTestFloatHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), nil, fh) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[1m])", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.226666666666667, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) -} - -func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - h := &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - } - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_count(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Count, vector[0].F) - } else { - require.Equal(t, float64(h.Count), vector[0].F) - } - - queryString = fmt.Sprintf("histogram_sum(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Sum, vector[0].F) - } else { - require.Equal(t, h.Sum, vector[0].F) - } - }) - } -} - -func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - testCases := []struct { - name string - h *histogram.Histogram - stdVar float64 - }{ - { - name: "1, 2, 3, 4 low-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 2}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.163807968526718, // actual variance: 1.25 - }, - { - name: "1, 2, 3, 4 hi-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 8, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 255, Length: 1}, - {Offset: 149, Length: 1}, - {Offset: 105, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.2471347737158793, // actual variance: 1.25 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: 62, - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: 1844.4651144196398, // actual variance: 1738.4082 - }, - { - name: "-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -112946, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 3}, - {Offset: 1, Length: 2}, - {Offset: 2, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 1}, - }, - NegativeBuckets: []int64{1, 0, 0, 0, 0, 2, -2, 0}, - }, - stdVar: 759352122.1939945, // actual variance: 882690990 - }, - { - name: "-10 x10", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -100, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 4, Length: 1}, - }, - NegativeBuckets: []int64{10}, - }, - stdVar: 1.725830020304794, // actual variance: 0 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, NaN", - h: &histogram.Histogram{ - Count: 8, - ZeroCount: 1, - Sum: math.NaN(), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, +Inf", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: math.Inf(1), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - } - for _, tc := range testCases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("%s floatHistogram=%t", tc.name, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, tc.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_stdvar(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, tc.stdVar, vector[0].F, 1e-12) - - queryString = fmt.Sprintf("histogram_stddev(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, math.Sqrt(tc.stdVar), vector[0].F, 1e-12) - }) - } - } -} - -func TestNativeHistogram_HistogramQuantile(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - quantile string - value float64 - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different quantiles to test for this histogram. - subCases []subCase - }{ - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.759999999999998, - }, - { - quantile: "0.9", - value: 13.600000000000001, - }, - { - quantile: "0.6", - value: 4.799999999999997, - }, - { - quantile: "0.5", - value: 1.6666666666666665, - }, - { // Zero bucket. - quantile: "0.1", - value: 0.0006000000000000001, - }, - { - quantile: "0", - value: 0, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { // Zero bucket. - quantile: "1", - value: 0, - }, - { // Zero bucket. - quantile: "0.99", - value: -6.000000000000048e-05, - }, - { // Zero bucket. - quantile: "0.9", - value: -0.0005999999999999996, - }, - { - quantile: "0.5", - value: -1.6666666666666667, - }, - { - quantile: "0.1", - value: -13.6, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.519999999999996, - }, - { - quantile: "0.9", - value: 11.200000000000003, - }, - { - quantile: "0.7", - value: 1.2666666666666657, - }, - { // Zero bucket. - quantile: "0.55", - value: 0.0006000000000000005, - }, - { // Zero bucket. - quantile: "0.5", - value: 0, - }, - { // Zero bucket. - quantile: "0.45", - value: -0.0005999999999999996, - }, - { - quantile: "0.3", - value: -1.266666666666667, - }, - { - quantile: "0.1", - value: -11.2, - }, - { - quantile: "0.01", - value: -15.52, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - } - - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s", j, sc.quantile), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_quantile(%s, %s)", sc.quantile, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.True(t, almost.Equal(sc.value, vector[0].F, defaultEpsilon)) - }) - } - idx++ - }) - } - } -} - -func TestNativeHistogram_HistogramFraction(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - lower, upper string - value float64 - } - - invariantCases := []subCase{ - { - lower: "42", - upper: "3.1415", - value: 0, - }, - { - lower: "0", - upper: "0", - value: 0, - }, - { - lower: "0.000001", - upper: "0.000001", - value: 0, - }, - { - lower: "42", - upper: "42", - value: 0, - }, - { - lower: "-3.1", - upper: "-3.1", - value: 0, - }, - { - lower: "3.1415", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "42", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "-Inf", - upper: "+Inf", - value: 1, - }, - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different ranges to test for this histogram. - subCases []subCase - }{ - { - text: "empty histogram", - h: &histogram.Histogram{}, - subCases: []subCase{ - { - lower: "3.1415", - upper: "42", - value: math.NaN(), - }, - }, - }, - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, // Abs: 2, 3, 1, 4 - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 1, - }, - { - lower: "-Inf", - upper: "0", - value: 0, - }, - { - lower: "-0.001", - upper: "0", - value: 0, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.0005", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 12., - }, - { - lower: "-inf", - upper: "-0.001", - value: 0, - }, - { - lower: "1", - upper: "2", - value: 3. / 12., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 12., - }, - { - lower: "1", - upper: "8", - value: 4. / 12., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 12., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 12., - }, - { - lower: "-2", - upper: "-1", - value: 0, - }, - { - lower: "-2", - upper: "-1.5", - value: 0, - }, - { - lower: "-8", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1.5", - value: 0, - }, - }, invariantCases...), - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0, - }, - { - lower: "-Inf", - upper: "0", - value: 1, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.001", - value: 0, - }, - { - lower: "-0.0005", - upper: "0", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 0, - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 12., - }, - { - lower: "1", - upper: "2", - value: 0, - }, - { - lower: "1.5", - upper: "2", - value: 0, - }, - { - lower: "1", - upper: "8", - value: 0, - }, - { - lower: "1", - upper: "6", - value: 0, - }, - { - lower: "1.5", - upper: "6", - value: 0, - }, - { - lower: "-2", - upper: "-1", - value: 3. / 12., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 12., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 12., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 12., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 12., - }, - }, invariantCases...), - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0.5, - }, - { - lower: "-Inf", - upper: "0", - value: 0.5, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 24, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 24., - }, - { - lower: "-0.0005", - upper: "0.0005", - value: 2. / 24., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 24., - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 24., - }, - { - lower: "1", - upper: "2", - value: 3. / 24., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 24., - }, - { - lower: "1", - upper: "8", - value: 4. / 24., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 24., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 24., - }, - { - lower: "-2", - upper: "-1", - value: 3. / 24., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 24., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 24., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 24., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 24., - }, - }, invariantCases...), - }, - } - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s %s", j, sc.lower, sc.upper), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_fraction(%s, %s, %s)", sc.lower, sc.upper, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if math.IsNaN(sc.value) { - require.True(t, math.IsNaN(vector[0].F)) - return - } - require.Equal(t, sc.value, vector[0].F) - }) - } - idx++ - }) - } - } -} - func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // TODO(codesome): Integrate histograms into the PromQL testing framework // and write more tests there. diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 7fc0403e3..37818e4f8 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -713,4 +713,4 @@ eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4) {} NaN eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) - {} 1 \ No newline at end of file + {} 1 From 10eb23bd6b0d3f4b4207e7af701fef85c1bd5fee Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 14 May 2024 17:45:39 +0100 Subject: [PATCH 086/112] [TEST] Rules: Sleep 15ms to fit Windows behaviour better On Windows, Go will sleep 15ms if you ask for less. TestAsyncRuleEvaluation compares actual delay to the nominal time, so using 15ms should work better on Windows, and be hardly noticeable elsewhere. Signed-off-by: Bryan Boreham --- rules/manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index 1862b5807..94d0755f9 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -2043,7 +2043,7 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) { require.EqualValues(t, maxInflight.Load(), int32(maxConcurrency)+int32(groupCount)) } -const artificialDelay = 10 * time.Millisecond +const artificialDelay = 15 * time.Millisecond func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.Int32, maxConcurrent int64) *ManagerOptions { var inflightMu sync.Mutex From fdaafdb041dcefddd5dfc9e29edc19951b807fe4 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 15 May 2024 06:26:19 +0200 Subject: [PATCH 087/112] tsdb: check for context cancel before regex matching postings (#14096) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tsdb: check for context cancel before regex matching postings Regex matching can be heavy if the regex takes a lot of cycles to evaluate and we can get stuck evaluating postings for a long time without this fix. The constant checkContextEveryNIterations=100 may be changed later. Signed-off-by: György Krajcsovits --- tsdb/index/index.go | 8 ++++ tsdb/index/index_test.go | 33 ++++++++++++++++ tsdb/index/postings.go | 5 +++ tsdb/index/postings_test.go | 17 +++++++++ tsdb/querier.go | 13 +++++++ tsdb/querier_test.go | 75 +++++++++++++++++++++++++++++++++++++ util/testutil/context.go | 27 ++++++++++++- 7 files changed, 177 insertions(+), 1 deletion(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index a36c33c4f..4ded4cbe2 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -51,6 +51,9 @@ const ( indexFilename = "index" seriesByteAlign = 16 + + // checkContextEveryNIterations is used in some tight loops to check if the context is done. + checkContextEveryNIterations = 100 ) type indexWriterSeries struct { @@ -1797,7 +1800,12 @@ func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, ma } var its []Postings + count := 1 for val, offset := range e { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + count++ if !match(val) { continue } diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index c451c38dd..22133d0b7 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -719,3 +719,36 @@ func TestChunksTimeOrdering(t *testing.T) { require.NoError(t, idx.Close()) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + dir := t.TempDir() + + idx, err := NewWriter(context.Background(), filepath.Join(dir, "index")) + require.NoError(t, err) + + seriesCount := 1000 + for i := 1; i <= seriesCount; i++ { + require.NoError(t, idx.AddSymbol(fmt.Sprintf("%4d", i))) + } + require.NoError(t, idx.AddSymbol("__name__")) + + for i := 1; i <= seriesCount; i++ { + require.NoError(t, idx.AddSeries(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), + chunks.Meta{Ref: 1, MinTime: 0, MaxTime: 10}, + )) + } + + require.NoError(t, idx.Close()) + + ir, err := NewFileReader(filepath.Join(dir, "index")) + require.NoError(t, err) + defer ir.Close() + + failAfter := uint64(seriesCount / 2) // Fail after processing half of the series. + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := ir.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter, ctx.Count()) +} diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 136b3441e..c6cf20e0e 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -416,7 +416,12 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, } var its []Postings + count := 1 for _, v := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + count++ if match(v) { its = append(its, NewListPostings(e[v])) } diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 9e6bd23f8..cabca5977 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -28,6 +28,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/testutil" ) func TestMemPostings_addFor(t *testing.T) { @@ -1282,3 +1283,19 @@ func BenchmarkListPostings(b *testing.B) { }) } } + +func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + memP := NewMemPostings() + seriesCount := 10 * checkContextEveryNIterations + for i := 1; i <= seriesCount; i++ { + memP.Add(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i))) + } + + failAfter := uint64(seriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := memP.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} diff --git a/tsdb/querier.go b/tsdb/querier.go index 1170493be..efd4daf26 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -33,6 +33,9 @@ import ( "github.com/prometheus/prometheus/util/annotations" ) +// checkContextEveryNIterations is used in some tight loops to check if the context is done. +const checkContextEveryNIterations = 100 + type blockBaseQuerier struct { blockID ulid.ULID index IndexReader @@ -358,7 +361,12 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma if m.Type == labels.MatchEqual && m.Value == "" { res = vals } else { + count := 1 for _, val := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if !m.Matches(val) { res = append(res, val) } @@ -387,7 +395,12 @@ func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, ma // re-use the allValues slice to avoid allocations // this is safe because the iteration is always ahead of the append filteredValues := allValues[:0] + count := 1 for _, v := range allValues { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if m.Matches(v) { filteredValues = append(filteredValues, v) } diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 16de6373d..bb13531d7 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/testutil" ) // TODO(bwplotka): Replace those mocks with remote.concreteSeriesSet. @@ -3638,3 +3639,77 @@ func TestQueryWithOneChunkCompletelyDeleted(t *testing.T) { require.NoError(t, css.Err()) require.Equal(t, 1, seriesCount) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := labelValuesWithMatchers(ctx, ir, "__name__", labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".+")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +func TestReader_InversePostingsForMatcherHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := inversePostingsForMatcher(ctx, ir, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +type mockReaderOfLabels struct{} + +const mockReaderOfLabelsSeriesCount = checkContextEveryNIterations * 10 + +func (m mockReaderOfLabels) LabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + return make([]string, mockReaderOfLabelsSeriesCount), nil +} + +func (m mockReaderOfLabels) LabelValueFor(context.Context, storage.SeriesRef, string) (string, error) { + panic("LabelValueFor called") +} + +func (m mockReaderOfLabels) SortedLabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + panic("SortedLabelValues called") +} + +func (m mockReaderOfLabels) Close() error { + return nil +} + +func (m mockReaderOfLabels) LabelNames(context.Context, ...*labels.Matcher) ([]string, error) { + panic("LabelNames called") +} + +func (m mockReaderOfLabels) LabelNamesFor(context.Context, ...storage.SeriesRef) ([]string, error) { + panic("LabelNamesFor called") +} + +func (m mockReaderOfLabels) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + panic("PostingsForLabelMatching called") +} + +func (m mockReaderOfLabels) Postings(context.Context, string, ...string) (index.Postings, error) { + panic("Postings called") +} + +func (m mockReaderOfLabels) ShardedPostings(index.Postings, uint64, uint64) index.Postings { + panic("Postings called") +} + +func (m mockReaderOfLabels) SortedPostings(index.Postings) index.Postings { + panic("SortedPostings called") +} + +func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[]chunks.Meta) error { + panic("Series called") +} + +func (m mockReaderOfLabels) Symbols() index.StringIter { + panic("Series called") +} diff --git a/util/testutil/context.go b/util/testutil/context.go index 3f63b030d..0c9e0f6f6 100644 --- a/util/testutil/context.go +++ b/util/testutil/context.go @@ -13,7 +13,12 @@ package testutil -import "time" +import ( + "context" + "time" + + "go.uber.org/atomic" +) // A MockContext provides a simple stub implementation of a Context. type MockContext struct { @@ -40,3 +45,23 @@ func (c *MockContext) Err() error { func (c *MockContext) Value(interface{}) interface{} { return nil } + +// MockContextErrAfter is a MockContext that will return an error after a certain +// number of calls to Err(). +type MockContextErrAfter struct { + MockContext + count atomic.Uint64 + FailAfter uint64 +} + +func (c *MockContextErrAfter) Err() error { + c.count.Inc() + if c.count.Load() >= c.FailAfter { + return context.Canceled + } + return c.MockContext.Err() +} + +func (c *MockContextErrAfter) Count() uint64 { + return c.count.Load() +} From b215a41be46cd463d5cfcc3deb7a7ab7028c1903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 15 May 2024 14:02:39 +0200 Subject: [PATCH 088/112] tsdb/index/postings: fix missing lock unlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup to #14096 Unfortunately the previous PR introduced this bug by not releasing the lock before returning. Signed-off-by: György Krajcsovits --- tsdb/index/postings.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index c6cf20e0e..159f6416e 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -419,6 +419,7 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, count := 1 for _, v := range vals { if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + p.mtx.RUnlock() return ErrPostings(ctx.Err()) } count++ From f10c3454e972e2b0e9ad5deec1e76996ca8aeb4d Mon Sep 17 00:00:00 2001 From: Oleksandr Redko Date: Mon, 13 May 2024 18:36:19 +0300 Subject: [PATCH 089/112] Enable perfsprint linter and fix up code Signed-off-by: Oleksandr Redko --- .golangci.yml | 8 +++++++- cmd/prometheus/main_test.go | 3 ++- cmd/promtool/main_test.go | 3 ++- cmd/promtool/tsdb.go | 6 +++--- cmd/promtool/unittest.go | 2 +- discovery/aws/ec2.go | 3 ++- discovery/aws/lightsail.go | 3 ++- discovery/azure/azure.go | 3 ++- discovery/consul/consul.go | 4 ++-- discovery/digitalocean/digitalocean.go | 2 +- discovery/dns/dns.go | 5 +++-- discovery/hetzner/hcloud.go | 9 ++++----- discovery/hetzner/robot.go | 2 +- discovery/legacymanager/manager_test.go | 2 +- discovery/linode/linode.go | 12 +++++------ discovery/manager_test.go | 2 +- discovery/marathon/marathon.go | 2 +- discovery/moby/network.go | 6 +++--- discovery/moby/nodes.go | 2 +- discovery/moby/services.go | 2 +- discovery/moby/tasks.go | 2 +- discovery/openstack/hypervisor.go | 5 +++-- discovery/openstack/instance.go | 5 +++-- discovery/ovhcloud/dedicated_server.go | 4 ++-- discovery/ovhcloud/vps.go | 13 ++++++------ discovery/puppetdb/puppetdb.go | 2 +- discovery/uyuni/uyuni.go | 3 ++- discovery/zookeeper/zookeeper.go | 10 +++++----- .../examples/custom-sd/adapter-usage/main.go | 4 ++-- model/histogram/float_histogram_test.go | 6 +++--- model/histogram/histogram_test.go | 8 ++++---- model/labels/labels_test.go | 3 ++- model/relabel/relabel_test.go | 4 ++-- notifier/notifier_test.go | 20 +++++++++---------- promql/engine_test.go | 5 +++-- rules/manager_test.go | 3 ++- scrape/scrape_test.go | 2 +- scrape/target_test.go | 3 ++- storage/remote/queue_manager_test.go | 2 +- tsdb/agent/series_test.go | 4 ++-- tsdb/compact_test.go | 5 +++-- tsdb/db_test.go | 2 +- tsdb/exemplar_test.go | 2 +- tsdb/head_test.go | 6 +++--- tsdb/index/postings_test.go | 4 ++-- tsdb/wlog/checkpoint_test.go | 13 ++++++------ web/api/v1/api_test.go | 3 ++- web/federate_test.go | 5 +++-- 48 files changed, 126 insertions(+), 103 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index a85a76cdf..c63184877 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -21,6 +21,7 @@ linters: - goimports - misspell - nolintlint + - perfsprint - predeclared - revive - testifylint @@ -44,7 +45,9 @@ issues: - linters: - godot source: "^// ===" - + - linters: + - perfsprint + text: "fmt.Sprintf can be replaced with string addition" linters-settings: depguard: rules: @@ -85,6 +88,9 @@ linters-settings: local-prefixes: github.com/prometheus/prometheus gofumpt: extra-rules: true + perfsprint: + # Optimizes `fmt.Errorf`. + errorf: false revive: # By default, revive will enable only the linting rules that are named in the configuration file. # So, it's needed to explicitly set in configuration all required rules. diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 03f3a9bc3..89c171bb5 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -24,6 +24,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -189,7 +190,7 @@ func TestSendAlerts(t *testing.T) { for i, tc := range testCases { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { senderFunc := senderFunc(func(alerts ...*notifier.Alert) { require.NotEmpty(t, tc.in, "sender called with 0 alert") require.Equal(t, tc.exp, alerts) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 7306a3e64..78500fe93 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -25,6 +25,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -410,7 +411,7 @@ func TestExitCodes(t *testing.T) { } { t.Run(c.file, func(t *testing.T) { for _, lintFatal := range []bool{true, false} { - t.Run(fmt.Sprintf("%t", lintFatal), func(t *testing.T) { + t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) { args := []string{"-test.main", "check", "config", "testdata/" + c.file} if lintFatal { args = append(args, "--lint-fatal") diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 2ad969438..6868102fa 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -856,9 +856,9 @@ func displayHistogram(dataType string, datas []int, total int) { } avg := sum / len(datas) fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1]) - maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end))) - maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step))) - maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount))) + maxLeftLen := strconv.Itoa(len(strconv.Itoa(end))) + maxRightLen := strconv.Itoa(len(strconv.Itoa(end + step))) + maxCountLen := strconv.Itoa(len(strconv.Itoa(maxCount))) for bucket, count := range buckets { percentage := 100.0 * count / total fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage)) diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 27d5dd98b..5451c5296 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -573,7 +573,7 @@ func (la labelsAndAnnotations) String() string { } s := "[\n0:" + indentLines("\n"+la[0].String(), " ") for i, l := range la[1:] { - s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ") + s += ",\n" + strconv.Itoa(i+1) + ":" + indentLines("\n"+l.String(), " ") } s += "\n]" diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index aa79fd9c6..a6a0a8257 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -279,7 +280,7 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error if inst.PrivateDnsName != nil { labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName) } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.Platform != nil { diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 86b138be5..0ad7f2d54 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -229,7 +230,7 @@ func (d *LightsailDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, lightsailLabelRegion: model.LabelValue(d.cfg.Region), } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.PublicIpAddress != nil { diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 7c2ece2c7..70d95b9f3 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -20,6 +20,7 @@ import ( "math/rand" "net" "net/http" + "strconv" "strings" "sync" "time" @@ -492,7 +493,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil { labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) - address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) + address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(address) return labels, nil } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index 40eed7697..bdc1fc8dc 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -539,9 +539,9 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr // since the service may be registered remotely through a different node. var addr string if serviceNode.Service.Address != "" { - addr = net.JoinHostPort(serviceNode.Service.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Service.Address, strconv.Itoa(serviceNode.Service.Port)) } else { - addr = net.JoinHostPort(serviceNode.Node.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Node.Address, strconv.Itoa(serviceNode.Service.Port)) } labels := model.LabelSet{ diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index 18380b729..ecee60cb1 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -177,7 +177,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { } labels := model.LabelSet{ - doLabelID: model.LabelValue(fmt.Sprintf("%d", droplet.ID)), + doLabelID: model.LabelValue(strconv.Itoa(droplet.ID)), doLabelName: model.LabelValue(droplet.Name), doLabelImage: model.LabelValue(droplet.Image.Slug), doLabelImageName: model.LabelValue(droplet.Image.Name), diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index cf56a2ad0..314c3d38c 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "sync" "time" @@ -200,7 +201,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ tg := &targetgroup.Group{} hostPort := func(a string, p int) model.LabelValue { - return model.LabelValue(net.JoinHostPort(a, fmt.Sprintf("%d", p))) + return model.LabelValue(net.JoinHostPort(a, strconv.Itoa(p))) } for _, record := range response.Answer { @@ -209,7 +210,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ switch addr := record.(type) { case *dns.SRV: dnsSrvRecordTarget = model.LabelValue(addr.Target) - dnsSrvRecordPort = model.LabelValue(fmt.Sprintf("%d", addr.Port)) + dnsSrvRecordPort = model.LabelValue(strconv.Itoa(int(addr.Port))) // Remove the final dot from rooted DNS names to make them look more usual. addr.Target = strings.TrimRight(addr.Target, ".") diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index 6d0599dfa..df56f94c5 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,7 +15,6 @@ package hetzner import ( "context" - "fmt" "net" "net/http" "strconv" @@ -92,7 +91,7 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er for i, server := range servers { labels := model.LabelSet{ hetznerLabelRole: model.LabelValue(HetznerRoleHcloud), - hetznerLabelServerID: model.LabelValue(fmt.Sprintf("%d", server.ID)), + hetznerLabelServerID: model.LabelValue(strconv.FormatInt(server.ID, 10)), hetznerLabelServerName: model.LabelValue(server.Name), hetznerLabelDatacenter: model.LabelValue(server.Datacenter.Name), hetznerLabelPublicIPv4: model.LabelValue(server.PublicNet.IPv4.IP.String()), @@ -102,10 +101,10 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er hetznerLabelHcloudDatacenterLocation: model.LabelValue(server.Datacenter.Location.Name), hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Datacenter.Location.NetworkZone), hetznerLabelHcloudType: model.LabelValue(server.ServerType.Name), - hetznerLabelHcloudCPUCores: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Cores)), + hetznerLabelHcloudCPUCores: model.LabelValue(strconv.Itoa(server.ServerType.Cores)), hetznerLabelHcloudCPUType: model.LabelValue(server.ServerType.CPUType), - hetznerLabelHcloudMemoryGB: model.LabelValue(fmt.Sprintf("%d", int(server.ServerType.Memory))), - hetznerLabelHcloudDiskGB: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Disk)), + hetznerLabelHcloudMemoryGB: model.LabelValue(strconv.Itoa(int(server.ServerType.Memory))), + hetznerLabelHcloudDiskGB: model.LabelValue(strconv.Itoa(server.ServerType.Disk)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.PublicNet.IPv4.IP.String(), strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index b862c33f5..516470b05 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -112,7 +112,7 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) hetznerLabelPublicIPv4: model.LabelValue(server.Server.ServerIP), hetznerLabelServerStatus: model.LabelValue(server.Server.Status), hetznerLabelRobotProduct: model.LabelValue(server.Server.Product), - hetznerLabelRobotCancelled: model.LabelValue(fmt.Sprintf("%t", server.Server.Canceled)), + hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.Server.ServerIP, strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go index 1ed699645..a455a8e34 100644 --- a/discovery/legacymanager/manager_test.go +++ b/discovery/legacymanager/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) discovery.StaticConfig { var cfg discovery.StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 2a5475b85..f09c1a734 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -325,7 +325,7 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro } labels := model.LabelSet{ - linodeLabelID: model.LabelValue(fmt.Sprintf("%d", instance.ID)), + linodeLabelID: model.LabelValue(strconv.Itoa(instance.ID)), linodeLabelName: model.LabelValue(instance.Label), linodeLabelImage: model.LabelValue(instance.Image), linodeLabelPrivateIPv4: model.LabelValue(privateIPv4), @@ -338,13 +338,13 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro linodeLabelType: model.LabelValue(instance.Type), linodeLabelStatus: model.LabelValue(instance.Status), linodeLabelGroup: model.LabelValue(instance.Group), - linodeLabelGPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)), + linodeLabelGPUs: model.LabelValue(strconv.Itoa(instance.Specs.GPUs)), linodeLabelHypervisor: model.LabelValue(instance.Hypervisor), linodeLabelBackups: model.LabelValue(backupsStatus), - linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)), - linodeLabelSpecsMemoryBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Memory)<<20)), - linodeLabelSpecsVCPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.VCPUs)), - linodeLabelSpecsTransferBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Transfer)<<20)), + linodeLabelSpecsDiskBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Disk)<<20, 10)), + linodeLabelSpecsMemoryBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Memory)<<20, 10)), + linodeLabelSpecsVCPUs: model.LabelValue(strconv.Itoa(instance.Specs.VCPUs)), + linodeLabelSpecsTransferBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Transfer)<<20, 10)), } addr := net.JoinHostPort(publicIPv4, strconv.FormatUint(uint64(d.port), 10)) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 656d7c3c6..be07edbdb 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) StaticConfig { var cfg StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 3e9e15967..38b47accf 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -505,7 +505,7 @@ func targetEndpoint(task *task, port uint32, containerNet bool) string { host = task.Host } - return net.JoinHostPort(host, fmt.Sprintf("%d", port)) + return net.JoinHostPort(host, strconv.Itoa(int(port))) } // Get a list of ports and a list of labels from a PortMapping. diff --git a/discovery/moby/network.go b/discovery/moby/network.go index 0e0d0041d..794d2e607 100644 --- a/discovery/moby/network.go +++ b/discovery/moby/network.go @@ -15,7 +15,7 @@ package moby import ( "context" - "fmt" + "strconv" "github.com/docker/docker/api/types" "github.com/docker/docker/client" @@ -44,8 +44,8 @@ func getNetworksLabels(ctx context.Context, client *client.Client, labelPrefix s labelPrefix + labelNetworkID: network.ID, labelPrefix + labelNetworkName: network.Name, labelPrefix + labelNetworkScope: network.Scope, - labelPrefix + labelNetworkInternal: fmt.Sprintf("%t", network.Internal), - labelPrefix + labelNetworkIngress: fmt.Sprintf("%t", network.Ingress), + labelPrefix + labelNetworkInternal: strconv.FormatBool(network.Internal), + labelPrefix + labelNetworkIngress: strconv.FormatBool(network.Ingress), } for k, v := range network.Labels { ln := strutil.SanitizeLabelName(k) diff --git a/discovery/moby/nodes.go b/discovery/moby/nodes.go index a7c5551c0..b5be844ed 100644 --- a/discovery/moby/nodes.go +++ b/discovery/moby/nodes.go @@ -66,7 +66,7 @@ func (d *Discovery) refreshNodes(ctx context.Context) ([]*targetgroup.Group, err swarmLabelNodeAddress: model.LabelValue(n.Status.Addr), } if n.ManagerStatus != nil { - labels[swarmLabelNodeManagerLeader] = model.LabelValue(fmt.Sprintf("%t", n.ManagerStatus.Leader)) + labels[swarmLabelNodeManagerLeader] = model.LabelValue(strconv.FormatBool(n.ManagerStatus.Leader)) labels[swarmLabelNodeManagerReachability] = model.LabelValue(n.ManagerStatus.Reachability) labels[swarmLabelNodeManagerAddr] = model.LabelValue(n.ManagerStatus.Addr) } diff --git a/discovery/moby/services.go b/discovery/moby/services.go index 1d472b5c0..c61b49925 100644 --- a/discovery/moby/services.go +++ b/discovery/moby/services.go @@ -116,7 +116,7 @@ func (d *Discovery) refreshServices(ctx context.Context) ([]*targetgroup.Group, labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/moby/tasks.go b/discovery/moby/tasks.go index 2505a7b07..38b9d33de 100644 --- a/discovery/moby/tasks.go +++ b/discovery/moby/tasks.go @@ -150,7 +150,7 @@ func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, err labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 16964cfb6..8964da929 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/gophercloud/gophercloud" @@ -72,7 +73,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + h.region), + Source: "OS_" + h.region, } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-hypervisors-details @@ -84,7 +85,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } for _, hypervisor := range hypervisorList { labels := model.LabelSet{} - addr := net.JoinHostPort(hypervisor.HostIP, fmt.Sprintf("%d", h.port)) + addr := net.JoinHostPort(hypervisor.HostIP, strconv.Itoa(h.port)) labels[model.AddressLabel] = model.LabelValue(addr) labels[openstackLabelHypervisorID] = model.LabelValue(hypervisor.ID) labels[openstackLabelHypervisorHostName] = model.LabelValue(hypervisor.HypervisorHostname) diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 9b28c1d6e..750d414a2 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -120,7 +121,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } pager := servers.List(client, opts) tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + i.region), + Source: "OS_" + i.region, } err = pager.EachPage(func(page pagination.Page) (bool, error) { if ctx.Err() != nil { @@ -194,7 +195,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if val, ok := floatingIPList[floatingIPKey{id: s.ID, fixed: addr}]; ok { lbls[openstackLabelPublicIP] = model.LabelValue(val) } - addr = net.JoinHostPort(addr, fmt.Sprintf("%d", i.port)) + addr = net.JoinHostPort(addr, strconv.Itoa(i.port)) lbls[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, lbls) diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index bb5dadcd7..a70857a08 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -144,12 +144,12 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou model.InstanceLabel: model.LabelValue(server.Name), dedicatedServerLabelPrefix + "state": model.LabelValue(server.State), dedicatedServerLabelPrefix + "commercial_range": model.LabelValue(server.CommercialRange), - dedicatedServerLabelPrefix + "link_speed": model.LabelValue(fmt.Sprintf("%d", server.LinkSpeed)), + dedicatedServerLabelPrefix + "link_speed": model.LabelValue(strconv.Itoa(server.LinkSpeed)), dedicatedServerLabelPrefix + "rack": model.LabelValue(server.Rack), dedicatedServerLabelPrefix + "no_intervention": model.LabelValue(strconv.FormatBool(server.NoIntervention)), dedicatedServerLabelPrefix + "os": model.LabelValue(server.Os), dedicatedServerLabelPrefix + "support_level": model.LabelValue(server.SupportLevel), - dedicatedServerLabelPrefix + "server_id": model.LabelValue(fmt.Sprintf("%d", server.ServerID)), + dedicatedServerLabelPrefix + "server_id": model.LabelValue(strconv.FormatInt(server.ServerID, 10)), dedicatedServerLabelPrefix + "reverse": model.LabelValue(server.Reverse), dedicatedServerLabelPrefix + "datacenter": model.LabelValue(server.Datacenter), dedicatedServerLabelPrefix + "name": model.LabelValue(server.Name), diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index e2d1dee36..58ceeabd8 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -19,6 +19,7 @@ import ( "net/netip" "net/url" "path" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -161,21 +162,21 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { model.InstanceLabel: model.LabelValue(server.Name), vpsLabelPrefix + "offer": model.LabelValue(server.Model.Offer), vpsLabelPrefix + "datacenter": model.LabelValue(fmt.Sprintf("%+v", server.Model.Datacenter)), - vpsLabelPrefix + "model_vcore": model.LabelValue(fmt.Sprintf("%d", server.Model.Vcore)), - vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(fmt.Sprintf("%d", server.Model.MaximumAdditionalIP)), + vpsLabelPrefix + "model_vcore": model.LabelValue(strconv.Itoa(server.Model.Vcore)), + vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(strconv.Itoa(server.Model.MaximumAdditionalIP)), vpsLabelPrefix + "version": model.LabelValue(server.Model.Version), vpsLabelPrefix + "model_name": model.LabelValue(server.Model.Name), - vpsLabelPrefix + "disk": model.LabelValue(fmt.Sprintf("%d", server.Model.Disk)), - vpsLabelPrefix + "memory": model.LabelValue(fmt.Sprintf("%d", server.Model.Memory)), + vpsLabelPrefix + "disk": model.LabelValue(strconv.Itoa(server.Model.Disk)), + vpsLabelPrefix + "memory": model.LabelValue(strconv.Itoa(server.Model.Memory)), vpsLabelPrefix + "zone": model.LabelValue(server.Zone), vpsLabelPrefix + "display_name": model.LabelValue(server.DisplayName), vpsLabelPrefix + "cluster": model.LabelValue(server.Cluster), vpsLabelPrefix + "state": model.LabelValue(server.State), vpsLabelPrefix + "name": model.LabelValue(server.Name), vpsLabelPrefix + "netboot_mode": model.LabelValue(server.NetbootMode), - vpsLabelPrefix + "memory_limit": model.LabelValue(fmt.Sprintf("%d", server.MemoryLimit)), + vpsLabelPrefix + "memory_limit": model.LabelValue(strconv.Itoa(server.MemoryLimit)), vpsLabelPrefix + "offer_type": model.LabelValue(server.OfferType), - vpsLabelPrefix + "vcore": model.LabelValue(fmt.Sprintf("%d", server.Vcore)), + vpsLabelPrefix + "vcore": model.LabelValue(strconv.Itoa(server.Vcore)), vpsLabelPrefix + "ipv4": model.LabelValue(ipv4), vpsLabelPrefix + "ipv6": model.LabelValue(ipv6), } diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8c9ccde0a..8f89acbf9 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -237,7 +237,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { pdbLabelResource: model.LabelValue(resource.Resource), pdbLabelType: model.LabelValue(resource.Type), pdbLabelTitle: model.LabelValue(resource.Title), - pdbLabelExported: model.LabelValue(fmt.Sprintf("%t", resource.Exported)), + pdbLabelExported: model.LabelValue(strconv.FormatBool(resource.Exported)), pdbLabelFile: model.LabelValue(resource.File), pdbLabelEnvironment: model.LabelValue(resource.Environment), } diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index e885ef2e8..c8af2f158 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -20,6 +20,7 @@ import ( "net/http" "net/url" "path" + "strconv" "strings" "time" @@ -269,7 +270,7 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(fmt.Sprintf("%d", endpoint.SystemID)), + uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 303c7ca6d..92904dd71 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -280,17 +280,17 @@ func parseServersetMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[serversetPathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.ServiceEndpoint.Host, fmt.Sprintf("%d", member.ServiceEndpoint.Port))) + net.JoinHostPort(member.ServiceEndpoint.Host, strconv.Itoa(member.ServiceEndpoint.Port))) labels[serversetEndpointLabelPrefix+"_host"] = model.LabelValue(member.ServiceEndpoint.Host) - labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.ServiceEndpoint.Port)) + labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.ServiceEndpoint.Port)) for name, endpoint := range member.AdditionalEndpoints { cleanName := model.LabelName(strutil.SanitizeLabelName(name)) labels[serversetEndpointLabelPrefix+"_host_"+cleanName] = model.LabelValue( endpoint.Host) labels[serversetEndpointLabelPrefix+"_port_"+cleanName] = model.LabelValue( - fmt.Sprintf("%d", endpoint.Port)) + strconv.Itoa(endpoint.Port)) } labels[serversetStatusLabel] = model.LabelValue(member.Status) @@ -321,10 +321,10 @@ func parseNerveMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[nervePathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.Host, fmt.Sprintf("%d", member.Port))) + net.JoinHostPort(member.Host, strconv.Itoa(member.Port))) labels[nerveEndpointLabelPrefix+"_host"] = model.LabelValue(member.Host) - labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.Port)) + labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.Port)) labels[nerveEndpointLabelPrefix+"_name"] = model.LabelValue(member.Name) return labels, nil diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index bfbca7b70..8ccbafe6f 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -127,9 +127,9 @@ func (d *discovery) parseServiceNodes(resp *http.Response, name string) (*target // since the service may be registered remotely through a different node. var addr string if node.ServiceAddress != "" { - addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.ServiceAddress, strconv.Itoa(node.ServicePort)) } else { - addr = net.JoinHostPort(node.Address, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.Address, strconv.Itoa(node.ServicePort)) } target := model.LabelSet{model.AddressLabel: model.LabelValue(addr)} diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 49fb77ab0..759da6540 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -14,9 +14,9 @@ package histogram import ( - "fmt" "math" "math/rand" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -2134,7 +2134,7 @@ func TestAllFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includeNeg { @@ -2360,7 +2360,7 @@ func TestAllReverseFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includePos { diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go index 14a948e64..d1a074135 100644 --- a/model/histogram/histogram_test.go +++ b/model/histogram/histogram_test.go @@ -14,8 +14,8 @@ package histogram import ( - "fmt" "math" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -72,7 +72,7 @@ func TestHistogramString(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { actualString := c.histogram.String() require.Equal(t, c.expectedString, actualString) }) @@ -211,7 +211,7 @@ func TestCumulativeBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.CumulativeBucketIterator() actualBuckets := make([]Bucket[uint64], 0, len(c.expectedBuckets)) for it.Next() { @@ -371,7 +371,7 @@ func TestRegularBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.PositiveBucketIterator() actualPositiveBuckets := make([]Bucket[uint64], 0, len(c.expectedPositiveBuckets)) for it.Next() { diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index 3d6e7659f..6464d007d 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -17,6 +17,7 @@ import ( "encoding/json" "fmt" "net/http" + "strconv" "strings" "testing" @@ -732,7 +733,7 @@ func TestScratchBuilder(t *testing.T) { want: FromStrings("ddd", "444"), }, } { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { b := NewScratchBuilder(len(tcase.add)) for _, lbl := range tcase.add { b.Add(lbl.Name, lbl.Value) diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 6798fb02a..507ea101d 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -14,7 +14,7 @@ package relabel import ( - "fmt" + "strconv" "testing" "github.com/prometheus/common/model" @@ -657,7 +657,7 @@ func TestRelabelValidate(t *testing.T) { }, } for i, test := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { err := test.config.Validate() if test.expected == "" { require.NoError(t, err) diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index e7a9243bc..d2e72ca33 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -74,7 +74,7 @@ func TestHandlerNextBatch(t *testing.T) { for i := range make([]struct{}, 2*maxBatchSize+1) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -186,10 +186,10 @@ func TestHandlerSendAll(t *testing.T) { for i := range make([]struct{}, maxBatchSize) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) expected = append(expected, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -297,23 +297,23 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { for i := range make([]struct{}, maxBatchSize/2) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected1 = append(expected1, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected2 = append(expected2, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -502,7 +502,7 @@ func TestHandlerQueuing(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -762,7 +762,7 @@ func TestHangingNotifier(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } diff --git a/promql/engine_test.go b/promql/engine_test.go index cc9185546..220eaca9d 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -20,6 +20,7 @@ import ( "math" "os" "sort" + "strconv" "testing" "time" @@ -4406,7 +4407,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { _, err := app.Append(0, labels.FromStrings("__name__", "float_series", "idx", "0"), ts, 42) require.NoError(t, err) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { @@ -4678,7 +4679,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { diff --git a/rules/manager_test.go b/rules/manager_test.go index 1862b5807..aeb327660 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -19,6 +19,7 @@ import ( "math" "os" "sort" + "strconv" "sync" "testing" "time" @@ -1361,7 +1362,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { ts := time.Now() app := db.Appender(context.Background()) for i, h := range hists { - l := labels.FromStrings("__name__", "histogram_metric", "idx", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "histogram_metric", "idx", strconv.Itoa(i)) _, err := app.AppendHistogram(0, l, ts.UnixMilli(), h.Copy(), nil) require.NoError(t, err) } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 51bd377e4..b5a31cb65 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1285,7 +1285,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { for i := 0; i < 500; i++ { s = fmt.Sprintf("%smetric_%d_%d 42\n", s, i, numScrapes) } - w.Write([]byte(fmt.Sprintf(s + "&"))) + w.Write([]byte(s + "&")) } else { cancel() } diff --git a/scrape/target_test.go b/scrape/target_test.go index f91e31050..b43ff2406 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -21,6 +21,7 @@ import ( "net/http/httptest" "net/url" "os" + "strconv" "strings" "testing" "time" @@ -67,7 +68,7 @@ func TestTargetOffset(t *testing.T) { // Calculate offsets for 10000 different targets. for i := range offsets { target := newTestTarget("example.com:80", 0, labels.FromStrings( - "label", fmt.Sprintf("%d", i), + "label", strconv.Itoa(i), )) offsets[i] = target.offset(interval, offsetSeed) } diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index e32a3ace0..6121fb6c0 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -468,7 +468,7 @@ func TestReleaseNoninternedString(t *testing.T) { m.StoreSeries([]record.RefSeries{ { Ref: chunks.HeadSeriesRef(i), - Labels: labels.FromStrings("asdf", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("asdf", strconv.Itoa(i)), }, }, 0) m.SeriesReset(1) diff --git a/tsdb/agent/series_test.go b/tsdb/agent/series_test.go index ae327d858..bc5a4af5d 100644 --- a/tsdb/agent/series_test.go +++ b/tsdb/agent/series_test.go @@ -14,8 +14,8 @@ package agent import ( - "fmt" "math" + "strconv" "sync" "testing" "time" @@ -53,7 +53,7 @@ func TestNoDeadlock(t *testing.T) { series := &memSeries{ ref: chunks.HeadSeriesRef(i), lset: labels.FromMap(map[string]string{ - "id": fmt.Sprintf("%d", i), + "id": strconv.Itoa(i), }), } stripeSeries.Set(series.lset.Hash(), series) diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index b2d2ea6e7..10c90e30d 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -22,6 +22,7 @@ import ( "os" "path" "path/filepath" + "strconv" "sync" "testing" "time" @@ -1129,7 +1130,7 @@ func BenchmarkCompactionFromHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - app.Append(0, labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) + app.Append(0, labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) } require.NoError(b, app.Commit()) } @@ -1161,7 +1162,7 @@ func BenchmarkCompactionFromOOOHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - lbls := labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) + lbls := labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) _, err = app.Append(0, lbls, int64(totalSamples), 0) require.NoError(b, err) for ts := 0; ts < totalSamples; ts++ { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 71b2f05ac..a682f4655 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -1065,7 +1065,7 @@ func TestWALSegmentSizeOptions(t *testing.T) { for i := int64(0); i < 155; i++ { app := db.Appender(context.Background()) - ref, err := app.Append(0, labels.FromStrings("wal"+fmt.Sprintf("%d", i), "size"), i, rand.Float64()) + ref, err := app.Append(0, labels.FromStrings("wal"+strconv.Itoa(int(i)), "size"), i, rand.Float64()) require.NoError(t, err) for j := int64(1); j <= 78; j++ { _, err := app.Append(ref, labels.EmptyLabels(), i+j, rand.Float64()) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 21030e457..b2be22170 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -416,7 +416,7 @@ func BenchmarkAddExemplar(b *testing.B) { exLabels := labels.FromStrings("trace_id", "89620921") for _, n := range []int{10000, 100000, 1000000} { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + b.Run(strconv.Itoa(n), func(b *testing.B) { for j := 0; j < b.N; j++ { b.StopTimer() exs, err := NewCircularExemplarStorage(int64(n), eMetrics) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index d9631b3b9..804886ad7 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3383,7 +3383,7 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) { func TestAppendHistogram(t *testing.T) { l := labels.FromStrings("a", "b") for _, numHistograms := range []int{1, 10, 150, 200, 250, 300} { - t.Run(fmt.Sprintf("%d", numHistograms), func(t *testing.T) { + t.Run(strconv.Itoa(numHistograms), func(t *testing.T) { head, _ := newTestHead(t, 1000, wlog.CompressionNone, false) t.Cleanup(func() { require.NoError(t, head.Close()) @@ -3692,7 +3692,7 @@ func TestChunkSnapshot(t *testing.T) { e := ex{ seriesLabels: lbls, e: exemplar.Exemplar{ - Labels: labels.FromStrings("trace_id", fmt.Sprintf("%d", rand.Int())), + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), Value: rand.Float64(), Ts: ts, }, @@ -5032,7 +5032,7 @@ func TestOOOAppendWithNoSeries(t *testing.T) { require.Equal(t, expSamples, ms.headChunks.chunk.NumSamples()) } - newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", fmt.Sprintf("%d", idx)) } + newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", strconv.Itoa(idx)) } s1 := newLabels(1) appendSample(s1, 300) // At 300m. diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index cabca5977..7fa0a892b 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -50,7 +50,7 @@ func TestMemPostings_ensureOrder(t *testing.T) { for j := range l { l[j] = storage.SeriesRef(rand.Uint64()) } - v := fmt.Sprintf("%d", i) + v := strconv.Itoa(i) p.m["a"][v] = l } @@ -391,7 +391,7 @@ func BenchmarkMerge(t *testing.B) { its := make([]Postings, len(refs)) for _, nSeries := range []int{1, 10, 100, 1000, 10000, 100000} { - t.Run(fmt.Sprint(nSeries), func(bench *testing.B) { + t.Run(strconv.Itoa(nSeries), func(bench *testing.B) { ctx := context.Background() for i := 0; i < bench.N; i++ { // Reset the ListPostings to their original values each time round the loop. diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index 279f7c435..ea11c5dd7 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -19,6 +19,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "testing" @@ -232,10 +233,10 @@ func TestCheckpoint(t *testing.T) { // Write changing metadata for each series. In the end, only the latest // version should end up in the checkpoint. b = enc.Metadata([]record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 1, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 3, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, + {Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, }, nil) require.NoError(t, w.Log(b)) @@ -324,8 +325,8 @@ func TestCheckpoint(t *testing.T) { testutil.RequireEqual(t, expectedRefSeries, series) expectedRefMetadata := []record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, + {Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, {Ref: 4, Unit: "unit", Help: "help"}, } sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref }) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 9d7d1d502..7d55dd11a 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -25,6 +25,7 @@ import ( "reflect" "runtime" "sort" + "strconv" "strings" "testing" "time" @@ -3544,7 +3545,7 @@ func TestTSDBStatus(t *testing.T) { }, } { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { api := &API{db: tc.db, gatherer: prometheus.DefaultGatherer} endpoint := tc.endpoint(api) req, err := http.NewRequest(tc.method, fmt.Sprintf("?%s", tc.values.Encode()), nil) diff --git a/web/federate_test.go b/web/federate_test.go index e5adb97eb..056a95d67 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -22,6 +22,7 @@ import ( "net/http" "net/http/httptest" "sort" + "strconv" "strings" "testing" "time" @@ -341,8 +342,8 @@ func TestFederationWithNativeHistograms(t *testing.T) { } app := db.Appender(context.Background()) for i := 0; i < 6; i++ { - l := labels.FromStrings("__name__", "test_metric", "foo", fmt.Sprintf("%d", i)) - expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "test_metric", "foo", strconv.Itoa(i)) + expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", strconv.Itoa(i)) var err error switch i { case 0, 3: From 0f01d4b336cb4829475fbee6595041a25d85e594 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 15 May 2024 21:58:56 +0200 Subject: [PATCH 090/112] Fix flaky test Signed-off-by: Arve Knudsen --- promql/engine_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index c47ceb246..f431ab41e 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -21,6 +21,7 @@ import ( "os" "sort" "strconv" + "sync" "testing" "time" @@ -94,9 +95,14 @@ func TestQueryConcurrency(t *testing.T) { return nil } + var wg sync.WaitGroup for i := 0; i < maxConcurrency; i++ { q := engine.NewTestQuery(f) - go q.Exec(ctx) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: // Expected. @@ -106,7 +112,11 @@ func TestQueryConcurrency(t *testing.T) { } q := engine.NewTestQuery(f) - go q.Exec(ctx) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: @@ -129,6 +139,8 @@ func TestQueryConcurrency(t *testing.T) { for i := 0; i < maxConcurrency; i++ { block <- struct{}{} } + + wg.Wait() } // contextDone returns an error if the context was canceled or timed out. From 5ca56eeb6bc7fb17c143a0a1ac075e7bf9e3414b Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 16 May 2024 11:51:46 +0200 Subject: [PATCH 091/112] tsdb/index: Refactor Reader tests (#14071) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tsdb/index: Refactor Reader tests Co-authored-by: Björn Rabenstein Signed-off-by: Arve Knudsen --------- Signed-off-by: Arve Knudsen Co-authored-by: Björn Rabenstein --- tsdb/index/index_test.go | 218 +++++++++++++-------------------------- 1 file changed, 69 insertions(+), 149 deletions(-) diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index 22133d0b7..5c6d64e07 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -20,7 +20,9 @@ import ( "hash/crc32" "os" "path/filepath" + "slices" "sort" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -160,39 +162,14 @@ func TestIndexRW_Create_Open(t *testing.T) { } func TestIndexRW_Postings(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - - series := []labels.Labels{ - labels.FromStrings("a", "1", "b", "1"), - labels.FromStrings("a", "1", "b", "2"), - labels.FromStrings("a", "1", "b", "3"), - labels.FromStrings("a", "1", "b", "4"), + var input indexWriterSeriesSlice + for i := 1; i < 5; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("a", "1", "b", strconv.Itoa(i)), + }) } - - require.NoError(t, iw.AddSymbol("1")) - require.NoError(t, iw.AddSymbol("2")) - require.NoError(t, iw.AddSymbol("3")) - require.NoError(t, iw.AddSymbol("4")) - require.NoError(t, iw.AddSymbol("a")) - require.NoError(t, iw.AddSymbol("b")) - - // Postings lists are only written if a series with the respective - // reference was added before. - require.NoError(t, iw.AddSeries(1, series[0])) - require.NoError(t, iw.AddSeries(2, series[1])) - require.NoError(t, iw.AddSeries(3, series[2])) - require.NoError(t, iw.AddSeries(4, series[3])) - - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) + ir, fn, _ := createFileReader(ctx, t, input) p, err := ir.Postings(ctx, "a", "1") require.NoError(t, err) @@ -205,7 +182,7 @@ func TestIndexRW_Postings(t *testing.T) { require.NoError(t, err) require.Empty(t, c) - testutil.RequireEqual(t, series[i], builder.Labels()) + testutil.RequireEqual(t, input[i].labels, builder.Labels()) } require.NoError(t, p.Err()) @@ -240,8 +217,6 @@ func TestIndexRW_Postings(t *testing.T) { "b": {"1", "2", "3", "4"}, }, labelIndices) - require.NoError(t, ir.Close()) - t.Run("ShardedPostings()", func(t *testing.T) { ir, err := NewFileReader(fn) require.NoError(t, err) @@ -296,42 +271,16 @@ func TestIndexRW_Postings(t *testing.T) { } func TestPostingsMany(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - // Create a label in the index which has 999 values. - symbols := map[string]struct{}{} - series := []labels.Labels{} + var input indexWriterSeriesSlice for i := 1; i < 1000; i++ { v := fmt.Sprintf("%03d", i) - series = append(series, labels.FromStrings("i", v, "foo", "bar")) - symbols[v] = struct{}{} + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("i", v, "foo", "bar"), + }) } - symbols["i"] = struct{}{} - symbols["foo"] = struct{}{} - symbols["bar"] = struct{}{} - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } - - for i, s := range series { - require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s)) - } - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) - defer func() { require.NoError(t, ir.Close()) }() + ir, _, symbols := createFileReader(ctx, t, input) cases := []struct { in []string @@ -387,25 +336,13 @@ func TestPostingsMany(t *testing.T) { } func TestPersistence_index_e2e(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 20000) require.NoError(t, err) - // Sort labels as the index writer expects series in sorted order. sort.Sort(labels.Slice(lbls)) - symbols := map[string]struct{}{} - for _, lset := range lbls { - lset.Range(func(l labels.Label) { - symbols[l.Name] = struct{}{} - symbols[l.Value] = struct{}{} - }) - } - var input indexWriterSeriesSlice - ref := uint64(0) // Generate ChunkMetas for every label set. for i, lset := range lbls { @@ -426,17 +363,7 @@ func TestPersistence_index_e2e(t *testing.T) { }) } - iw, err := NewWriter(context.Background(), filepath.Join(dir, indexFilename)) - require.NoError(t, err) - - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } + ir, _, _ := createFileReader(ctx, t, input) // Population procedure as done by compaction. var ( @@ -447,8 +374,6 @@ func TestPersistence_index_e2e(t *testing.T) { mi := newMockIndex() for i, s := range input { - err = iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...) - require.NoError(t, err) require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) s.labels.Range(func(l labels.Label) { @@ -462,12 +387,6 @@ func TestPersistence_index_e2e(t *testing.T) { postings.Add(storage.SeriesRef(i), s.labels) } - err = iw.Close() - require.NoError(t, err) - - ir, err := NewFileReader(filepath.Join(dir, indexFilename)) - require.NoError(t, err) - for p := range mi.postings { gotp, err := ir.Postings(ctx, p.Name, p.Value) require.NoError(t, err) @@ -523,8 +442,6 @@ func TestPersistence_index_e2e(t *testing.T) { } sort.Strings(expSymbols) require.Equal(t, expSymbols, gotSymbols) - - require.NoError(t, ir.Close()) } func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) { @@ -624,39 +541,14 @@ func BenchmarkReader_ShardedPostings(b *testing.B) { numShards = 16 ) - dir, err := os.MkdirTemp("", "benchmark_reader_sharded_postings") - require.NoError(b, err) - defer func() { - require.NoError(b, os.RemoveAll(dir)) - }() - ctx := context.Background() - - // Generate an index. - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(ctx, fn) - require.NoError(b, err) - + var input indexWriterSeriesSlice for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSymbol(fmt.Sprintf("%10d", i))) + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)), + }) } - require.NoError(b, iw.AddSymbol("const")) - require.NoError(b, iw.AddSymbol("unique")) - - for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSeries(storage.SeriesRef(i), - labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)))) - } - - require.NoError(b, iw.Close()) - - b.ResetTimer() - - // Create a reader to read back all postings from the index. - ir, err := NewFileReader(fn) - require.NoError(b, err) - + ir, _, _ := createFileReader(ctx, b, input) b.ResetTimer() for n := 0; n < b.N; n++ { @@ -721,28 +613,17 @@ func TestChunksTimeOrdering(t *testing.T) { } func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { - dir := t.TempDir() - - idx, err := NewWriter(context.Background(), filepath.Join(dir, "index")) - require.NoError(t, err) - - seriesCount := 1000 - for i := 1; i <= seriesCount; i++ { - require.NoError(t, idx.AddSymbol(fmt.Sprintf("%4d", i))) + const seriesCount = 1000 + var input indexWriterSeriesSlice + for i := 1; i < seriesCount; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), + chunks: []chunks.Meta{ + {Ref: 1, MinTime: 0, MaxTime: 10}, + }, + }) } - require.NoError(t, idx.AddSymbol("__name__")) - - for i := 1; i <= seriesCount; i++ { - require.NoError(t, idx.AddSeries(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), - chunks.Meta{Ref: 1, MinTime: 0, MaxTime: 10}, - )) - } - - require.NoError(t, idx.Close()) - - ir, err := NewFileReader(filepath.Join(dir, "index")) - require.NoError(t, err) - defer ir.Close() + ir, _, _ := createFileReader(context.Background(), t, input) failAfter := uint64(seriesCount / 2) // Fail after processing half of the series. ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} @@ -752,3 +633,42 @@ func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { require.Error(t, p.Err()) require.Equal(t, failAfter, ctx.Count()) } + +// createFileReader creates a temporary index file. It writes the provided input to this file. +// It returns a Reader for this file, the file's name, and the symbol map. +func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSeriesSlice) (*Reader, string, map[string]struct{}) { + tb.Helper() + + fn := filepath.Join(tb.TempDir(), indexFilename) + + iw, err := NewWriter(ctx, fn) + require.NoError(tb, err) + + symbols := map[string]struct{}{} + for _, s := range input { + s.labels.Range(func(l labels.Label) { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + }) + } + + syms := []string{} + for s := range symbols { + syms = append(syms, s) + } + slices.Sort(syms) + for _, s := range syms { + require.NoError(tb, iw.AddSymbol(s)) + } + for i, s := range input { + require.NoError(tb, iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) + } + require.NoError(tb, iw.Close()) + + ir, err := NewFileReader(fn) + require.NoError(tb, err) + tb.Cleanup(func() { + require.NoError(tb, ir.Close()) + }) + return ir, fn, symbols +} From 0fea1065feb7b6e93cf529a7ff33a40f4e8a10cd Mon Sep 17 00:00:00 2001 From: kushagra Shukla Date: Sat, 18 May 2024 07:26:59 -0400 Subject: [PATCH 092/112] added line When set, query.max-concurrency may need to be adjusted accordingly. Signed-off-by: kushagra Shukla Signed-off-by: kushagra Shukla --- cmd/prometheus/main.go | 2 +- docs/command-line/prometheus.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 8218ffb18..e250a95c8 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -418,7 +418,7 @@ func main() { serverOnlyFlag(a, "rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) - serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently."). + serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently. When set, \"query.max-concurrency\" may need to be adjusted accordingly."). Default("4").Int64Var(&cfg.maxConcurrentEvals) a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 93eaf251d..aa9bf3bfb 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -48,7 +48,7 @@ The Prometheus monitoring server | --rules.alert.for-outage-tolerance | Max time to tolerate prometheus outage for restoring "for" state of alert. Use with server mode only. | `1h` | | --rules.alert.for-grace-period | Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. Use with server mode only. | `10m` | | --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` | -| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. Use with server mode only. | `4` | +| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` | | --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` | | --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` | | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | From 52f68a96a45b1c60eda9986656f332472babd047 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Sat, 18 May 2024 16:12:33 +0200 Subject: [PATCH 093/112] web/api: export defaultStatsRenderer (#14121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit defaultStatsRenderer->DefaultStatsRenderer Add short docstrings. I'd like to use the stats renderer to peek at the statistics in https://github.com/grafana/mimir/pull/7966 However I cannot call the original function without this export afterwards. Signed-off-by: György Krajcsovits --- web/api/v1/api.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index dc2236507..7bbf38a69 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -116,9 +116,11 @@ type RulesRetriever interface { AlertingRules() []*rules.AlertingRule } +// StatsRenderer converts engine statistics into a format suitable for the API. type StatsRenderer func(context.Context, *stats.Statistics, string) stats.QueryStats -func defaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { +// DefaultStatsRenderer is the default stats renderer for the API. +func DefaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { if param != "" { return stats.NewQueryStats(s) } @@ -272,7 +274,7 @@ func NewAPI( buildInfo: buildInfo, gatherer: gatherer, isAgent: isAgent, - statsRenderer: defaultStatsRenderer, + statsRenderer: DefaultStatsRenderer, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -461,7 +463,7 @@ func (api *API) query(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) @@ -563,7 +565,7 @@ func (api *API) queryRange(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) From fe9cb5a803f69b1bad21b374e6fbd9084d6481d2 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 21 May 2024 11:30:43 +0200 Subject: [PATCH 094/112] Check context every 128 labels instead of 100 (#14118) Follow up on https://github.com/prometheus/prometheus/pull/14096 As promised, I bring a benchmark, which shows a very small improvement if context is checked every 128 iterations of label instead of every 100. It's much easier for a computer to check modulo 128 than modulo 100. This is a very small 0-2% improvement but I'd say this is one of the hottest paths of the app so this is still relevant. Signed-off-by: Oleg Zaytsev --- tsdb/index/index.go | 2 +- tsdb/index/postings_test.go | 54 +++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 4ded4cbe2..480e6a8fc 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -53,7 +53,7 @@ const ( seriesByteAlign = 16 // checkContextEveryNIterations is used in some tight loops to check if the context is done. - checkContextEveryNIterations = 100 + checkContextEveryNIterations = 128 ) type indexWriterSeries struct { diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 7fa0a892b..2cbc14ac6 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -22,8 +22,10 @@ import ( "math/rand" "sort" "strconv" + "strings" "testing" + "github.com/grafana/regexp" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -1284,6 +1286,58 @@ func BenchmarkListPostings(b *testing.B) { } } +func slowRegexpString() string { + nums := map[int]struct{}{} + for i := 10_000; i < 20_000; i++ { + if i%3 == 0 { + nums[i] = struct{}{} + } + } + + var sb strings.Builder + sb.WriteString(".*(9999") + for i := range nums { + sb.WriteString("|") + sb.WriteString(strconv.Itoa(i)) + } + sb.WriteString(").*") + return sb.String() +} + +func BenchmarkMemPostings_PostingsForLabelMatching(b *testing.B) { + fast := regexp.MustCompile("^(100|200)$") + slowRegexp := "^" + slowRegexpString() + "$" + b.Logf("Slow regexp length = %d", len(slowRegexp)) + slow := regexp.MustCompile(slowRegexp) + + for _, labelValueCount := range []int{1_000, 10_000, 100_000} { + b.Run(fmt.Sprintf("labels=%d", labelValueCount), func(b *testing.B) { + mp := NewMemPostings() + for i := 0; i < labelValueCount; i++ { + mp.Add(storage.SeriesRef(i), labels.FromStrings("label", strconv.Itoa(i))) + } + + fp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString)) + require.NoError(b, err) + b.Logf("Fast matcher matches %d series", len(fp)) + b.Run("matcher=fast", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString).Next() + } + }) + + sp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString)) + require.NoError(b, err) + b.Logf("Slow matcher matches %d series", len(sp)) + b.Run("matcher=slow", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString).Next() + } + }) + }) + } +} + func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { memP := NewMemPostings() seriesCount := 10 * checkContextEveryNIterations From 3127a4029ea5e860c5b93441ecf5122f6cf82df0 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 21 May 2024 14:44:55 +0200 Subject: [PATCH 095/112] doc: Clarify the limits of dumping/backfilling via OpenMetrics This is about native histograms (not yet supported) and staleness markers (for which OpenMetrics support isn't even planned). Signed-off-by: beorn7 --- cmd/promtool/main.go | 2 +- docs/command-line/promtool.md | 2 +- docs/storage.md | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 40d933376..e1d275e97 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -240,7 +240,7 @@ func main() { dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() - tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped.") + tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 9ed51fb7c..443cd3f0c 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -585,7 +585,7 @@ Dump samples from a TSDB. ##### `promtool tsdb dump-openmetrics` -[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped. +[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics. diff --git a/docs/storage.md b/docs/storage.md index 46bb7210e..b66f2062a 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -197,6 +197,9 @@ or time-series database to Prometheus. To do so, the user must first convert the source data into [OpenMetrics](https://openmetrics.io/) format, which is the input format for the backfilling as described below. +Note that native histograms and staleness markers are not supported by this +procedure, as they cannot be represented in the OpenMetrics format. + ### Usage Backfilling can be used via the Promtool command line. Promtool will write the blocks From fabcd7e7c6db57ebd5456d7f5239f22cde37ba32 Mon Sep 17 00:00:00 2001 From: Ayoub Mrini Date: Tue, 21 May 2024 19:07:29 +0200 Subject: [PATCH 096/112] fix(api): Send warnings only if the limit is really exceeded (#14116) for the the series, label names and label values APIs Add warnings count check to TestEndpoints The limit param was added in https://github.com/prometheus/prometheus/pull/13396 Signed-off-by: machine424 --- web/api/v1/api.go | 7 ++++--- web/api/v1/api_test.go | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 7bbf38a69..f0884926e 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -704,7 +704,7 @@ func (api *API) labelNames(r *http.Request) apiFuncResult { names = []string{} } - if len(names) >= limit { + if len(names) > limit { names = names[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -793,7 +793,7 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { slices.Sort(vals) - if len(vals) >= limit { + if len(vals) > limit { vals = vals[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -889,7 +889,8 @@ func (api *API) series(r *http.Request) (result apiFuncResult) { } metrics = append(metrics, set.At().Labels()) - if len(metrics) >= limit { + if len(metrics) > limit { + metrics = metrics[:limit] warnings.Add(errors.New("results truncated due to limit")) return apiFuncResult{metrics, nil, warnings, closer} } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7d55dd11a..74cd2239d 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -1060,6 +1060,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E responseLen int // If nonzero, check only the length; `response` is ignored. responseMetadataTotal int responseAsJSON string + warningsCount int errType errorType sorter func(interface{}) metadata []targetMetadata @@ -1417,7 +1418,17 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "match[]": []string{"test_metric1"}, "limit": []string{"1"}, }, - responseLen: 1, // API does not specify which particular value will come back. + responseLen: 1, // API does not specify which particular value will come back. + warningsCount: 1, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{"test_metric1"}, + "limit": []string{"2"}, + }, + responseLen: 2, // API does not specify which particular value will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Missing match[] query params in series requests. { @@ -2700,7 +2711,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelValues, + params: map[string]string{ + "name": "__name__", + }, + query: url.Values{ + "limit": []string{"4"}, + }, + responseLen: 4, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Label names. { @@ -2847,7 +2870,16 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelNames, + query: url.Values{ + "limit": []string{"3"}, + }, + responseLen: 3, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, }...) } @@ -2924,6 +2956,8 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E require.NoError(t, err) require.JSONEq(t, test.responseAsJSON, string(s)) } + + require.Len(t, res.warnings, test.warningsCount) }) } }) From 2d2b440304d0ec599bf19b102724aab67650b91e Mon Sep 17 00:00:00 2001 From: Jayapriya Pai Date: Tue, 21 May 2024 22:38:35 +0530 Subject: [PATCH 097/112] fix: correct the typo in azuread sdk auth (#14106) Signed-off-by: Jayapriya Pai --- storage/remote/azuread/azuread.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index e2058fb54..58520c6a5 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -75,7 +75,7 @@ type AzureADConfig struct { //nolint:revive // exported. // OAuth is the oauth config that is being used to authenticate. OAuth *OAuthConfig `yaml:"oauth,omitempty"` - // OAuth is the oauth config that is being used to authenticate. + // SDK is the SDK config that is being used to authenticate. SDK *SDKConfig `yaml:"sdk,omitempty"` // Cloud is the Azure cloud in which the service is running. Example: AzurePublic/AzureGovernment/AzureChina. From 5be753f17731b1d37646737d5e0c8fe73d2b67d2 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Fri, 24 May 2024 09:15:00 +0300 Subject: [PATCH 098/112] .gitpod.Dockerfile: Auto-fetch Go and goyacc vers In this commit we auto-fetch Go version from go.mod and goyacc version from Makefile in the Prometheus repo. Signed-off-by: Mohamed Awnallah --- .gitpod.Dockerfile | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index d645db5de..2370ec5f5 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -1,15 +1,33 @@ FROM gitpod/workspace-full +# Set Node.js version as an environment variable. ENV CUSTOM_NODE_VERSION=16 -ENV CUSTOM_GO_VERSION=1.19 -ENV GOPATH=$HOME/go-packages -ENV GOROOT=$HOME/go -ENV PATH=$GOROOT/bin:$GOPATH/bin:$PATH +# Install and use the specified Node.js version via nvm. RUN bash -c ". .nvm/nvm.sh && nvm install ${CUSTOM_NODE_VERSION} && nvm use ${CUSTOM_NODE_VERSION} && nvm alias default ${CUSTOM_NODE_VERSION}" +# Ensure nvm uses the default Node.js version in all new shells. RUN echo "nvm use default &>/dev/null" >> ~/.bashrc.d/51-nvm-fix -RUN curl -fsSL https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz | tar xzs \ - && printf '%s\n' 'export GOPATH=/workspace/go' \ - 'export PATH=$GOPATH/bin:$PATH' > $HOME/.bashrc.d/300-go +# Remove any existing Go installation in $HOME path. +RUN rm -rf $HOME/go $HOME/go-packages + +# Export go environment variables. +RUN echo "export GOPATH=/workspace/go" >> ~/.bashrc.d/300-go && \ + echo "export GOBIN=\$GOPATH/bin" >> ~/.bashrc.d/300-go && \ + echo "export GOROOT=${HOME}/go" >> ~/.bashrc.d/300-go && \ + echo "export PATH=\$GOROOT/bin:\$GOBIN:\$PATH" >> ~/.bashrc + +# Reload the environment variables to ensure go environment variables are +# available in subsequent commands. +RUN bash -c "source ~/.bashrc && source ~/.bashrc.d/300-go" + +# Fetch the Go version dynamically from the Prometheus go.mod file and Install Go in $HOME path. +RUN export CUSTOM_GO_VERSION=$(curl -sSL "https://raw.githubusercontent.com/prometheus/prometheus/main/go.mod" | awk '/^go/{print $2".0"}') && \ + curl -fsSL "https://dl.google.com/go/go${CUSTOM_GO_VERSION}.linux-amd64.tar.gz" | \ + tar -xz -C $HOME + +# Fetch the goyacc parser version dynamically from the Prometheus Makefile +# and install it globally in $GOBIN path. +RUN GOYACC_VERSION=$(curl -fsSL "https://raw.githubusercontent.com/prometheus/prometheus/main/Makefile" | awk -F'=' '/GOYACC_VERSION \?=/{gsub(/ /, "", $2); print $2}') && \ + go install "golang.org/x/tools/cmd/goyacc@${GOYACC_VERSION}" From 8894d65cd6135635c4ac9cf100464e6aacd29593 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Fri, 24 May 2024 19:43:21 -0700 Subject: [PATCH 099/112] Fix head stats and hooks when replaying a corrupted snapshot (#14079) * Fixing head stats and hooks when replaying a corrupted snapshot Signed-off-by: alanprot * Fixing create/removed series metrics Signed-off-by: alanprot * Refactoring to have common code between gc and flush method Signed-off-by: alanprot * Update tsdb/head.go Co-authored-by: Ayoub Mrini Signed-off-by: Alan Protasio * refactor Signed-off-by: alanprot * Update tsdb/head_test.go Co-authored-by: Ganesh Vernekar Signed-off-by: Alan Protasio * Update tsdb/head_test.go Co-authored-by: Ganesh Vernekar Signed-off-by: Alan Protasio --------- Signed-off-by: alanprot Signed-off-by: Alan Protasio Co-authored-by: Ayoub Mrini Co-authored-by: Ganesh Vernekar --- tsdb/head.go | 74 +++++++++++++++++++++++++++++------------------ tsdb/head_test.go | 48 ++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 28 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 8b3d9787c..d5f7144fd 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -310,12 +310,22 @@ func (h *Head) resetInMemoryState() error { return err } + if h.series != nil { + // reset the existing series to make sure we call the appropriated hooks + // and increment the series removed metrics + fs := h.series.iterForDeletion(func(_ int, _ uint64, s *memSeries, flushedForCallback map[chunks.HeadSeriesRef]labels.Labels) { + // All series should be flushed + flushedForCallback[s.ref] = s.lset + }) + h.metrics.seriesRemoved.Add(float64(fs)) + } + + h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.iso = newIsolation(h.opts.IsolationDisabled) h.oooIso = newOOOIsolation() - + h.numSeries.Store(0) h.exemplarMetrics = em h.exemplars = es - h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.postings = index.NewUnorderedMemPostings() h.tombstones = tombstones.NewMemTombstones() h.deleted = map[chunks.HeadSeriesRef]int{} @@ -1861,11 +1871,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st // minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series. func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ int, _, _ int64, minMmapFile int) { var ( - deleted = map[storage.SeriesRef]struct{}{} - rmChunks = 0 - actualMint int64 = math.MaxInt64 - minOOOTime int64 = math.MaxInt64 - deletedFromPrevStripe = 0 + deleted = map[storage.SeriesRef]struct{}{} + rmChunks = 0 + actualMint int64 = math.MaxInt64 + minOOOTime int64 = math.MaxInt64 ) minMmapFile = math.MaxInt32 @@ -1923,27 +1932,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( deletedForCallback[series.ref] = series.lset } - // Run through all series shard by shard, checking which should be deleted. - for i := 0; i < s.size; i++ { - deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe) - s.locks[i].Lock() - - // Delete conflicts first so seriesHashmap.del doesn't move them to the `unique` field, - // after deleting `unique`. - for hash, all := range s.hashes[i].conflicts { - for _, series := range all { - check(i, hash, series, deletedForCallback) - } - } - for hash, series := range s.hashes[i].unique { - check(i, hash, series, deletedForCallback) - } - - s.locks[i].Unlock() - - s.seriesLifecycleCallback.PostDeletion(deletedForCallback) - deletedFromPrevStripe = len(deletedForCallback) - } + s.iterForDeletion(check) if actualMint == math.MaxInt64 { actualMint = mint @@ -1952,6 +1941,35 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( return deleted, rmChunks, actualMint, minOOOTime, minMmapFile } +// The iterForDeletion function iterates through all series, invoking the checkDeletedFunc for each. +// The checkDeletedFunc takes a map as input and should add to it all series that were deleted and should be included +// when invoking the PostDeletion hook. +func (s *stripeSeries) iterForDeletion(checkDeletedFunc func(int, uint64, *memSeries, map[chunks.HeadSeriesRef]labels.Labels)) int { + seriesSetFromPrevStripe := 0 + totalDeletedSeries := 0 + // Run through all series shard by shard + for i := 0; i < s.size; i++ { + seriesSet := make(map[chunks.HeadSeriesRef]labels.Labels, seriesSetFromPrevStripe) + s.locks[i].Lock() + // Iterate conflicts first so f doesn't move them to the `unique` field, + // after deleting `unique`. + for hash, all := range s.hashes[i].conflicts { + for _, series := range all { + checkDeletedFunc(i, hash, series, seriesSet) + } + } + + for hash, series := range s.hashes[i].unique { + checkDeletedFunc(i, hash, series, seriesSet) + } + s.locks[i].Unlock() + s.seriesLifecycleCallback.PostDeletion(seriesSet) + totalDeletedSeries += len(seriesSet) + seriesSetFromPrevStripe = len(seriesSet) + } + return totalDeletedSeries +} + func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries { i := uint64(id) & uint64(s.size-1) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 804886ad7..6b4ec4ca4 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -4007,6 +4007,9 @@ func TestSnapshotError(t *testing.T) { require.NoError(t, err) f, err := os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) require.NoError(t, err) + // Create snapshot backup to be restored on future test cases. + snapshotBackup, err := io.ReadAll(f) + require.NoError(t, err) _, err = f.WriteAt([]byte{0b11111111}, 18) require.NoError(t, err) require.NoError(t, f.Close()) @@ -4021,10 +4024,44 @@ func TestSnapshotError(t *testing.T) { // There should be no series in the memory after snapshot error since WAL was removed. require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Equal(t, uint64(0), head.NumSeries()) require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) tm, err = head.tombstones.Get(1) require.NoError(t, err) require.Empty(t, tm) + require.NoError(t, head.Close()) + + // Test corruption in the middle of the snapshot. + f, err = os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) + require.NoError(t, err) + _, err = f.WriteAt(snapshotBackup, 0) + require.NoError(t, err) + _, err = f.WriteAt([]byte{0b11111111}, 300) + require.NoError(t, err) + require.NoError(t, f.Close()) + + c := &countSeriesLifecycleCallback{} + opts := head.opts + opts.SeriesCallback = c + + w, err = wlog.NewSize(nil, nil, head.wal.Dir(), 32768, wlog.CompressionNone) + require.NoError(t, err) + head, err = NewHead(prometheus.NewRegistry(), nil, w, nil, head.opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + + // There should be no series in the memory after snapshot error since WAL was removed. + require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) + require.Equal(t, uint64(0), head.NumSeries()) + + // Since the snapshot could replay certain series, we continue invoking the create hooks. + // In such instances, we need to ensure that we also trigger the delete hooks when resetting the memory. + require.Equal(t, int64(2), c.created.Load()) + require.Equal(t, int64(2), c.deleted.Load()) + + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesRemoved)) + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesCreated)) } func TestHistogramMetrics(t *testing.T) { @@ -5829,3 +5866,14 @@ func TestHeadCompactableDoesNotCompactEmptyHead(t *testing.T) { require.False(t, head.compactable()) } + +type countSeriesLifecycleCallback struct { + created atomic.Int64 + deleted atomic.Int64 +} + +func (c *countSeriesLifecycleCallback) PreCreation(labels.Labels) error { return nil } +func (c *countSeriesLifecycleCallback) PostCreation(labels.Labels) { c.created.Inc() } +func (c *countSeriesLifecycleCallback) PostDeletion(s map[chunks.HeadSeriesRef]labels.Labels) { + c.deleted.Add(int64(len(s))) +} From f3b8750339d65fc25714c8f92f8afacefb6a727d Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 27 May 2024 17:14:17 +0200 Subject: [PATCH 100/112] Join errors Signed-off-by: Arve Knudsen --- promql/query_logger.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/promql/query_logger.go b/promql/query_logger.go index 76528f958..7e06ebb97 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -16,6 +16,7 @@ package promql import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -93,14 +94,14 @@ type mmapedFile struct { func (f *mmapedFile) Close() error { err := f.m.Unmap() - if fErr := f.f.Close(); fErr != nil && err == nil { - return fmt.Errorf("close mmapedFile.f: %w", fErr) + if err != nil { + err = fmt.Errorf("mmapedFile: unmapping: %w", err) + } + if fErr := f.f.Close(); fErr != nil { + return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) } - if err != nil { - return fmt.Errorf("mmapedFile: unmapping: %w", err) - } - return nil + return err } func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { From e6f1f7e32db72df1cf42bedc4df087d99e1f39d3 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Mon, 27 May 2024 18:25:02 +0200 Subject: [PATCH 101/112] docs/configuration: clarify OpenStack metadata labels (#14149) On several occasions, users assumed that the `__meta_openstack_tag_` labels were about tags [1] instead of metadata [2]. While we can't really change the Prometheus label name, we can at least clarify in the documentation what's the information carried in the label. [1] https://specs.openstack.org/openstack/api-wg/guidelines/tags.html [2] https://docs.openstack.org/api-ref/compute/#server-metadata-servers-metadata Signed-off-by: Simon Pasquier --- docs/configuration/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 61e86988e..dedd7a0f7 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -1349,7 +1349,7 @@ interface. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_openstack_address_pool`: the pool of the private IP. -* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance. +* `__meta_openstack_instance_flavor`: the flavor ID of the OpenStack instance. * `__meta_openstack_instance_id`: the OpenStack instance ID. * `__meta_openstack_instance_image`: the ID of the image the OpenStack instance is using. * `__meta_openstack_instance_name`: the OpenStack instance name. @@ -1357,7 +1357,7 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_openstack_private_ip`: the private IP of the OpenStack instance. * `__meta_openstack_project_id`: the project (tenant) owning this instance. * `__meta_openstack_public_ip`: the public IP of the OpenStack instance. -* `__meta_openstack_tag_`: each tag value of the instance. +* `__meta_openstack_tag_`: each metadata item of the instance, with any unsupported characters converted to an underscore. * `__meta_openstack_user_id`: the user account owning the tenant. See below for the configuration options for OpenStack discovery: From b2396c0c8f6420bbd0681d4500c33851e75681f3 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 27 May 2024 21:57:45 +0200 Subject: [PATCH 102/112] Upgrade to golangci-lint v1.59.0 Signed-off-by: Arve Knudsen --- .github/workflows/ci.yml | 2 +- .golangci.yml | 2 +- Makefile.common | 2 +- cmd/prometheus/query_log_test.go | 4 ++-- scripts/golangci-lint.yml | 2 +- tsdb/chunks/queue_test.go | 2 +- tsdb/db_test.go | 10 +++++----- tsdb/head_test.go | 4 ++-- tsdb/wlog/checkpoint_test.go | 2 +- web/api/v1/api_test.go | 2 ++ 10 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cead7abfd..6d14b9324 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,7 +175,7 @@ jobs: with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.56.2 + version: v1.59.0 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' diff --git a/.golangci.yml b/.golangci.yml index c63184877..f81b29ed2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,7 +47,7 @@ issues: source: "^// ===" - linters: - perfsprint - text: "fmt.Sprintf can be replaced with string addition" + text: "fmt.Sprintf can be replaced with string concatenation" linters-settings: depguard: rules: diff --git a/Makefile.common b/Makefile.common index 0e9ace29b..161729235 100644 --- a/Makefile.common +++ b/Makefile.common @@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.56.2 +GOLANGCI_LINT_VERSION ?= v1.59.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index dd6b56672..9a7a3ed85 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -296,7 +296,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -366,7 +366,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } } diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index b36f71c3c..da99a131e 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -35,4 +35,4 @@ jobs: - name: Lint uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 with: - version: v1.56.2 + version: v1.59.0 diff --git a/tsdb/chunks/queue_test.go b/tsdb/chunks/queue_test.go index 5756e4585..9f761a5f3 100644 --- a/tsdb/chunks/queue_test.go +++ b/tsdb/chunks/queue_test.go @@ -55,7 +55,7 @@ func (q *writeJobQueue) assertInvariants(t *testing.T) { require.Len(t, s.segment, s.nextWrite) } // Last segment must have at least one element, or we wouldn't have created it. - require.Greater(t, s.nextWrite, 0) + require.Positive(t, s.nextWrite) } require.Equal(t, q.size, totalSize) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index f0b27dcc2..5965e5317 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -4574,7 +4574,7 @@ func TestOOOCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) require.Len(t, ms.ooo.oooMmappedChunks, 14) // 7 original, 7 duplicate. } checkNonEmptyOOOChunk(series1) @@ -4715,7 +4715,7 @@ func TestOOOCompactionWithNormalCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -4816,7 +4816,7 @@ func TestOOOCompactionWithDisabledWriteLog(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -5517,8 +5517,8 @@ func TestWBLAndMmapReplay(t *testing.T) { addedRecs++ require.NoError(t, newWbl.Log(rec)) } - require.Greater(t, markers, 0) - require.Greater(t, addedRecs, 0) + require.Positive(t, markers) + require.Positive(t, addedRecs) require.NoError(t, newWbl.Close()) require.NoError(t, sr.Close()) require.NoError(t, os.RemoveAll(wblDir)) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 6b4ec4ca4..bb437ab59 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3557,7 +3557,7 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { expMmapChunks = append(expMmapChunks, &cpy) } expHeadChunkSamples := ms.headChunks.chunk.NumSamples() - require.Greater(t, expHeadChunkSamples, 0) + require.Positive(t, expHeadChunkSamples) // Series with mix of histograms and float. s2 := labels.FromStrings("a", "b2") @@ -4638,7 +4638,7 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) { require.NoError(t, err) require.NotEqual(t, "", name) require.Equal(t, 0, idx) - require.Greater(t, offset, 0) + require.Positive(t, offset) } // TestWBLReplay checks the replay at a low level. diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index ea11c5dd7..a9786454d 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -251,7 +251,7 @@ func TestCheckpoint(t *testing.T) { require.NoError(t, w.Truncate(107)) require.NoError(t, DeleteCheckpoints(w.Dir(), 106)) require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples) - require.Greater(t, stats.DroppedSamples, 0) + require.Positive(t, stats.DroppedSamples) // Only the new checkpoint should be left. files, err := os.ReadDir(dir) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 74cd2239d..b30890893 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -2973,8 +2973,10 @@ func assertAPIError(t *testing.T, got *apiError, exp errorType) { t.Helper() if exp == errorNone { + //nolint:testifylint require.Nil(t, got) } else { + //nolint:testifylint require.NotNil(t, got) require.Equal(t, exp, got.typ, "(%q)", got) } From 013998fa7fca99b210c0b6cf13f9cea0d33f62b6 Mon Sep 17 00:00:00 2001 From: Matthieu MOREL Date: Tue, 28 May 2024 10:06:08 +0200 Subject: [PATCH 103/112] Bump golangci-lint action (#14154) * Bump golangci-lint action to 6.0.1 * Synchronize script/golangci-lint.yml and workflows/ci.yml Signed-off-by: Matthieu MOREL --- .github/workflows/ci.yml | 5 ++--- scripts/golangci-lint.yml | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d14b9324..a56140c19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -161,17 +161,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: - cache: false go-version: 1.22.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index da99a131e..5ceb59099 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,8 +24,8 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - - name: install Go + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -33,6 +33,7 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: + args: --verbose version: v1.59.0 From 25b0991c3d8485f5c8f8af0023d6ce46921745b2 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Mon, 27 May 2024 14:43:39 +0200 Subject: [PATCH 104/112] Enable additional Go metrics Enable some additioal Go runtime metrics in order to observe additional performance data. Enables a number of new metrics: ``` HELP go_gc_cycles_automatic_gc_cycles_total Count of completed GC cycles generated by the Go runtime. HELP go_gc_cycles_forced_gc_cycles_total Count of completed GC cycles forced by the application. HELP go_gc_cycles_total_gc_cycles_total Count of all completed GC cycles. HELP go_gc_gogc_percent Heap size target percentage configured by the user, otherwise 100. This value is set by the GOGC environment variable, and the runtime/debug.SetGCPercent function. HELP go_gc_gomemlimit_bytes Go runtime memory limit configured by the user, otherwise math.MaxInt64. This value is set by the GOMEMLIMIT environment variable, and the runtime/debug.SetMemoryLimit function. HELP go_gc_heap_allocs_by_size_bytes Distribution of heap allocations by approximate size. Bucket counts increase monotonically. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_allocs_bytes_total Cumulative sum of memory allocated to the heap by the application. HELP go_gc_heap_allocs_objects_total Cumulative count of heap allocations triggered by the application. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_frees_by_size_bytes Distribution of freed heap allocations by approximate size. Bucket counts increase monotonically. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_frees_bytes_total Cumulative sum of heap memory freed by the garbage collector. HELP go_gc_heap_frees_objects_total Cumulative count of heap allocations whose storage was freed by the garbage collector. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_goal_bytes Heap size target for the end of the GC cycle. HELP go_gc_heap_live_bytes Heap memory occupied by live objects that were marked by the previous GC. HELP go_gc_heap_objects_objects Number of objects, live or unswept, occupying heap memory. HELP go_gc_heap_tiny_allocs_objects_total Count of small allocations that are packed together into blocks. These allocations are counted separately from other allocations because each individual allocation is not tracked by the runtime, only their block. Each block is already accounted for in allocs-by-size and frees-by-size. HELP go_gc_limiter_last_enabled_gc_cycle GC cycle the last time the GC CPU limiter was enabled. This metric is useful for diagnosing the root cause of an out-of-memory error, because the limiter trades memory for CPU time when the GC's CPU time gets too high. This is most likely to occur with use of SetMemoryLimit. The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled. HELP go_gc_pauses_seconds Deprecated. Prefer the identical /sched/pauses/total/gc:seconds. HELP go_gc_scan_globals_bytes The total amount of global variable space that is scannable. HELP go_gc_scan_heap_bytes The total amount of heap space that is scannable. HELP go_gc_scan_stack_bytes The number of bytes of stack that were scanned last GC cycle. HELP go_gc_scan_total_bytes The total amount space that is scannable. Sum of all metrics in /gc/scan. HELP go_gc_stack_starting_size_bytes The stack size of new goroutines. HELP go_sched_gomaxprocs_threads The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously. HELP go_sched_goroutines_goroutines Count of live goroutines. HELP go_sched_latencies_seconds Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running. Bucket counts increase monotonically. HELP go_sched_pauses_stopping_gc_seconds Distribution of individual GC-related stop-the-world stopping latencies. This is the time it takes from deciding to stop the world until all Ps are stopped. This is a subset of the total GC-related stop-the-world time (/sched/pauses/total/gc:seconds). During this time, some threads may be executing. Bucket counts increase monotonically. HELP go_sched_pauses_stopping_other_seconds Distribution of individual non-GC-related stop-the-world stopping latencies. This is the time it takes from deciding to stop the world until all Ps are stopped. This is a subset of the total non-GC-related stop-the-world time (/sched/pauses/total/other:seconds). During this time, some threads may be executing. Bucket counts increase monotonically. HELP go_sched_pauses_total_gc_seconds Distribution of individual GC-related stop-the-world pause latencies. This is the time from deciding to stop the world until the world is started again. Some of this time is spent getting all threads to stop (this is measured directly in /sched/pauses/stopping/gc:seconds), during which some threads may still be running. Bucket counts increase monotonically. HELP go_sched_pauses_total_other_seconds Distribution of individual non-GC-related stop-the-world pause latencies. This is the time from deciding to stop the world until the world is started again. Some of this time is spent getting all threads to stop (measured directly in /sched/pauses/stopping/other:seconds). Bucket counts increase monotonically. ``` Signed-off-by: SuperQ --- cmd/prometheus/main.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index e250a95c8..f2988b2f2 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -42,6 +42,7 @@ import ( "github.com/mwitkow/go-conntrack" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" "github.com/prometheus/common/promlog" @@ -252,6 +253,18 @@ func main() { newFlagRetentionDuration model.Duration ) + // Unregister the default GoCollector, and reregister with our defaults. + if prometheus.Unregister(collectors.NewGoCollector()) { + prometheus.MustRegister( + collectors.NewGoCollector( + collectors.WithGoCollectorRuntimeMetrics( + collectors.MetricsGC, + collectors.MetricsScheduler, + ), + ), + ) + } + cfg := flagConfig{ notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, From 6683895620ac5d6e20b58ccbff8b20604e55881a Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 29 May 2024 08:03:33 -0700 Subject: [PATCH 105/112] optimize regex matching for empty label values in posting match (#14075) Also update tests. Signed-off-by: Ben Ye --- tsdb/querier.go | 4 ++-- tsdb/querier_test.go | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tsdb/querier.go b/tsdb/querier.go index efd4daf26..1071c4a71 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -357,8 +357,8 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma } res := vals[:0] - // If the inverse match is ="", we just want all the values. - if m.Type == labels.MatchEqual && m.Value == "" { + // If the match before inversion was !="" or !~"", we just want all the values. + if m.Value == "" && (m.Type == labels.MatchRegexp || m.Type == labels.MatchEqual) { res = vals } else { count := 1 diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index bb13531d7..c7e60a0e1 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2808,6 +2808,13 @@ func TestPostingsForMatchers(t *testing.T) { }, }, // Not regex. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", "")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^1$")}, exp: []labels.Labels{ From 929fbf860ee8b1391fc94952fbd1ec0879824576 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:43:34 +0100 Subject: [PATCH 106/112] [Test] TSDB: let BenchmarkAddExemplar reuse slots Test with different amounts of capacity and exemplars, so that sometimes new exemplars are evicting older exemplars. Signed-off-by: Bryan Boreham --- tsdb/exemplar_test.go | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index b2be22170..fac7ef57b 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -415,27 +415,29 @@ func BenchmarkAddExemplar(b *testing.B) { // before adding. exLabels := labels.FromStrings("trace_id", "89620921") - for _, n := range []int{10000, 100000, 1000000} { - b.Run(strconv.Itoa(n), func(b *testing.B) { - for j := 0; j < b.N; j++ { - b.StopTimer() - exs, err := NewCircularExemplarStorage(int64(n), eMetrics) - require.NoError(b, err) - es := exs.(*CircularExemplarStorage) - var l labels.Labels - b.StartTimer() + for _, capacity := range []int{1000, 10000, 100000} { + for _, n := range []int{10000, 100000, 1000000} { + b.Run(fmt.Sprintf("%d/%d", n, capacity), func(b *testing.B) { + for j := 0; j < b.N; j++ { + b.StopTimer() + exs, err := NewCircularExemplarStorage(int64(capacity), eMetrics) + require.NoError(b, err) + es := exs.(*CircularExemplarStorage) + var l labels.Labels + b.StartTimer() - for i := 0; i < n; i++ { - if i%100 == 0 { - l = labels.FromStrings("service", strconv.Itoa(i)) - } - err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) - if err != nil { - require.NoError(b, err) + for i := 0; i < n; i++ { + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } + err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) + if err != nil { + require.NoError(b, err) + } } } - } - }) + }) + } } } From f0c50b5a66f0ad962e6cbddfc66055b54fecf8e7 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:00:32 +0100 Subject: [PATCH 107/112] [Test] TSDB: BenchmarkResizeExemplar multiple per series One exemplar per series is not a typical workload. Make it the same as `BenchmarkAddExemplar`. Signed-off-by: Bryan Boreham --- tsdb/exemplar_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index fac7ef57b..7723ec389 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -482,8 +482,11 @@ func BenchmarkResizeExemplars(b *testing.B) { require.NoError(b, err) es := exs.(*CircularExemplarStorage) + var l labels.Labels for i := 0; i < int(float64(tc.startSize)*float64(1.5)); i++ { - l := labels.FromStrings("service", strconv.Itoa(i)) + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i)}) if err != nil { From 3eb55818778951f953434700184eabf004f817b2 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:25:00 +0100 Subject: [PATCH 108/112] [ENHANCEMENT] TSDB: Reduce map lookups on exemplar index In many cases we already have a pointer to the entry. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 213fff5c2..1c149fa0a 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -371,11 +371,12 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp return err } - _, ok := ce.index[string(seriesLabels)] + idx, ok := ce.index[string(seriesLabels)] if !ok { - ce.index[string(seriesLabels)] = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + idx = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + ce.index[string(seriesLabels)] = idx } else { - ce.exemplars[ce.index[string(seriesLabels)].newest].next = ce.nextIndex + ce.exemplars[idx.newest].next = ce.nextIndex } if prev := ce.exemplars[ce.nextIndex]; prev == nil { @@ -383,13 +384,13 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp } else { // There exists an exemplar already on this ce.nextIndex entry, // drop it, to make place for others. - var buf [1024]byte - prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) if prev.next == noExemplar { // Last item for this series, remove index entry. + var buf [1024]byte + prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) delete(ce.index, string(prevLabels)) } else { - ce.index[string(prevLabels)].oldest = prev.next + prev.ref.oldest = prev.next } } @@ -397,8 +398,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp // since this is the first exemplar stored for this series. ce.exemplars[ce.nextIndex].next = noExemplar ce.exemplars[ce.nextIndex].exemplar = e - ce.exemplars[ce.nextIndex].ref = ce.index[string(seriesLabels)] - ce.index[string(seriesLabels)].newest = ce.nextIndex + ce.exemplars[ce.nextIndex].ref = idx + idx.newest = ce.nextIndex ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) From c0bb156eca2b6216242d0efdc80627bb0096ea00 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:37:52 +0100 Subject: [PATCH 109/112] [ENHANCEMENT] TSDB: Eliminate pointer when storing exemplars Saves memory and effort. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 1c149fa0a..a8156669c 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -37,7 +37,7 @@ const ( type CircularExemplarStorage struct { lock sync.RWMutex - exemplars []*circularBufferEntry + exemplars []circularBufferEntry nextIndex int metrics *ExemplarMetrics @@ -121,7 +121,7 @@ func NewCircularExemplarStorage(length int64, m *ExemplarMetrics) (ExemplarStora length = 0 } c := &CircularExemplarStorage{ - exemplars: make([]*circularBufferEntry, length), + exemplars: make([]circularBufferEntry, length), index: make(map[string]*indexEntry, length/estimatedExemplarsPerSeries), metrics: m, } @@ -292,7 +292,7 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) - ce.exemplars = make([]*circularBufferEntry, l) + ce.exemplars = make([]circularBufferEntry, l) ce.index = make(map[string]*indexEntry, l/estimatedExemplarsPerSeries) ce.nextIndex = 0 @@ -313,8 +313,8 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { for i := int64(0); i < count; i++ { idx := (startIndex + i) % int64(len(oldBuffer)) - if entry := oldBuffer[idx]; entry != nil { - ce.migrate(entry) + if oldBuffer[idx].ref != nil { + ce.migrate(&oldBuffer[idx]) migrated++ } } @@ -344,7 +344,7 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { idx.newest = ce.nextIndex entry.next = noExemplar - ce.exemplars[ce.nextIndex] = entry + ce.exemplars[ce.nextIndex] = *entry ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) } @@ -379,9 +379,7 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp ce.exemplars[idx.newest].next = ce.nextIndex } - if prev := ce.exemplars[ce.nextIndex]; prev == nil { - ce.exemplars[ce.nextIndex] = &circularBufferEntry{} - } else { + if prev := &ce.exemplars[ce.nextIndex]; prev.ref != nil { // There exists an exemplar already on this ce.nextIndex entry, // drop it, to make place for others. if prev.next == noExemplar { @@ -417,15 +415,15 @@ func (ce *CircularExemplarStorage) computeMetrics() { return } - if next := ce.exemplars[ce.nextIndex]; next != nil { + if ce.exemplars[ce.nextIndex].ref != nil { ce.metrics.exemplarsInStorage.Set(float64(len(ce.exemplars))) - ce.metrics.lastExemplarsTs.Set(float64(next.exemplar.Ts) / 1000) + ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[ce.nextIndex].exemplar.Ts) / 1000) return } // We did not yet fill the buffer. ce.metrics.exemplarsInStorage.Set(float64(ce.nextIndex)) - if ce.exemplars[0] != nil { + if ce.exemplars[0].ref != nil { ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[0].exemplar.Ts) / 1000) } } @@ -439,7 +437,7 @@ func (ce *CircularExemplarStorage) IterateExemplars(f func(seriesLabels labels.L idx := ce.nextIndex l := len(ce.exemplars) for i := 0; i < l; i, idx = i+1, (idx+1)%l { - if ce.exemplars[idx] == nil { + if ce.exemplars[idx].ref == nil { continue } err := f(ce.exemplars[idx].ref.seriesLabels, ce.exemplars[idx].exemplar) From 7d984874471389e8995bcedfeb9f684544c50f30 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:22:56 +0100 Subject: [PATCH 110/112] [ENHANCEMENT] TSDB: let Resize re-use buffer This saves having to zero the buffer every time. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index a8156669c..898bb2490 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -311,10 +311,11 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // This way we don't migrate exemplars that would just be overwritten when migrating later exemplars. startIndex := (oldNextIndex - count + int64(len(oldBuffer))) % int64(len(oldBuffer)) + var buf [1024]byte for i := int64(0); i < count; i++ { idx := (startIndex + i) % int64(len(oldBuffer)) if oldBuffer[idx].ref != nil { - ce.migrate(&oldBuffer[idx]) + ce.migrate(&oldBuffer[idx], buf[:]) migrated++ } } @@ -328,9 +329,8 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // migrate is like AddExemplar but reuses existing structs. Expected to be called in batch and requires // external lock and does not compute metrics. -func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { - var buf [1024]byte - seriesLabels := entry.ref.seriesLabels.Bytes(buf[:]) +func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byte) { + seriesLabels := entry.ref.seriesLabels.Bytes(buf[:0]) idx, ok := ce.index[string(seriesLabels)] if !ok { From 3ee52abb5342728f84cd722fcd2516bed9d066be Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:32:17 +0100 Subject: [PATCH 111/112] [ENHANCEMENT] TSDB: Save map lookup on validation Goes faster. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 898bb2490..7545ab9a6 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -214,12 +214,12 @@ func (ce *CircularExemplarStorage) ValidateExemplar(l labels.Labels, e exemplar. // Optimize by moving the lock to be per series (& benchmark it). ce.lock.RLock() defer ce.lock.RUnlock() - return ce.validateExemplar(seriesLabels, e, false) + return ce.validateExemplar(ce.index[string(seriesLabels)], e, false) } // Not thread safe. The appended parameters tells us whether this is an external validation, or internal // as a result of an AddExemplar call, in which case we should update any relevant metrics. -func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemplar, appended bool) error { +func (ce *CircularExemplarStorage) validateExemplar(idx *indexEntry, e exemplar.Exemplar, appended bool) error { if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -239,8 +239,7 @@ func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemp return err } - idx, ok := ce.index[string(key)] - if !ok { + if idx == nil { return nil } @@ -362,7 +361,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp ce.lock.Lock() defer ce.lock.Unlock() - err := ce.validateExemplar(seriesLabels, e, true) + idx, ok := ce.index[string(seriesLabels)] + err := ce.validateExemplar(idx, e, true) if err != nil { if errors.Is(err, storage.ErrDuplicateExemplar) { // Duplicate exemplar, noop. @@ -371,7 +371,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp return err } - idx, ok := ce.index[string(seriesLabels)] if !ok { idx = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} ce.index[string(seriesLabels)] = idx From 37b408c6cd5284dd250549819d76b52afceb6d73 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Thu, 30 May 2024 11:49:50 +0100 Subject: [PATCH 112/112] Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar * Fix comments Signed-off-by: gotjosh * Add a server configuration option. Signed-off-by: gotjosh * Appease the linter #1 Signed-off-by: gotjosh * Add the new server flag documentation Signed-off-by: gotjosh * Improve documentation of the new flag and configuration Signed-off-by: gotjosh * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh * Add `initial` to the flag help Signed-off-by: gotjosh * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh * Improve wording on CHANGELOG Signed-off-by: gotjosh * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz Signed-off-by: gotjosh * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh --------- Signed-off-by: Ganesh Vernekar Signed-off-by: gotjosh Co-authored-by: Ganesh Vernekar Co-authored-by: Julius Volz --- CHANGELOG.md | 1 + cmd/prometheus/main.go | 3 + config/config.go | 4 + docs/configuration/configuration.md | 4 + docs/configuration/recording_rules.md | 6 + model/rulefmt/rulefmt.go | 9 +- rules/alerting.go | 9 +- rules/alerting_test.go | 32 +- rules/group.go | 24 +- rules/manager.go | 2 + rules/manager_test.go | 738 ++++++++++++++------------ rules/origin_test.go | 2 +- rules/recording.go | 5 +- rules/recording_test.go | 10 +- rules/rule.go | 2 +- 15 files changed, 471 insertions(+), 380 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 079dd7595..941f09da9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 +* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 * [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index f2988b2f2..0532bc380 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -785,6 +785,9 @@ func main() { ResendDelay: time.Duration(cfg.resendDelay), MaxConcurrentEvals: cfg.maxConcurrentEvals, ConcurrentEvalsEnabled: cfg.enableConcurrentRuleEval, + DefaultRuleQueryOffset: func() time.Duration { + return time.Duration(cfgFile.GlobalConfig.RuleQueryOffset) + }, }) } diff --git a/config/config.go b/config/config.go index 1cfd58864..463dbc357 100644 --- a/config/config.go +++ b/config/config.go @@ -145,6 +145,7 @@ var ( ScrapeInterval: model.Duration(1 * time.Minute), ScrapeTimeout: model.Duration(10 * time.Second), EvaluationInterval: model.Duration(1 * time.Minute), + RuleQueryOffset: model.Duration(0 * time.Minute), // When native histogram feature flag is enabled, ScrapeProtocols default // changes to DefaultNativeHistogramScrapeProtocols. ScrapeProtocols: DefaultScrapeProtocols, @@ -397,6 +398,8 @@ type GlobalConfig struct { ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` // How frequently to evaluate rules by default. EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` + // Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + RuleQueryOffset model.Duration `yaml:"rule_query_offset"` // File to which PromQL queries are logged. QueryLogFile string `yaml:"query_log_file,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. @@ -556,6 +559,7 @@ func (c *GlobalConfig) isZero() bool { c.ScrapeInterval == 0 && c.ScrapeTimeout == 0 && c.EvaluationInterval == 0 && + c.RuleQueryOffset == 0 && c.QueryLogFile == "" && c.ScrapeProtocols == nil } diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index dedd7a0f7..a8fc9c626 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -70,6 +70,10 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] + + # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 48ab951f9..9aa226bbc 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -86,6 +86,9 @@ name: # rule can produce. 0 is no limit. [ limit: | default = 0 ] +# Offset the rule evaluation timestamp of this particular group by the specified duration into the past. +[ query_offset: | default = global.rule_query_offset ] + rules: [ - ... ] ``` @@ -148,6 +151,9 @@ the rule, active, pending, or inactive, are cleared as well. The event will be recorded as an error in the evaluation, and as such no stale markers are written. +# Rule query offset +This is useful to ensure the underlying metrics have been received and stored in Prometheus. Metric availability delays are more likely to occur when Prometheus is running as a remote write target due to the nature of distributed systems, but can also occur when there's anomalies with scraping and/or short evaluation intervals. + # Failed rule evaluations due to slow evaluation If a rule group hasn't finished evaluating before its next evaluation is supposed to start (as defined by the `evaluation_interval`), the next evaluation will be skipped. Subsequent evaluations of the rule group will continue to be skipped until the initial evaluation either completes or times out. When this happens, there will be a gap in the metric produced by the recording rule. The `rule_group_iterations_missed_total` metric will be incremented for each missed iteration of the rule group. diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index 4ed1619d6..bfb85ce74 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -136,10 +136,11 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` } // Rule describes an alerting or recording rule. diff --git a/rules/alerting.go b/rules/alerting.go index 2d2b19efe..2dc0917dc 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -338,10 +338,9 @@ const resolvedRetention = 15 * time.Minute // Eval evaluates the rule expression and then creates pending alerts and fires // or removes previously pending alerts accordingly. -func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { +func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(r)) - - res, err := query(ctx, r.vector.String(), ts) + res, err := query(ctx, r.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } @@ -484,8 +483,8 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } if r.restored.Load() { - vec = append(vec, r.sample(a, ts)) - vec = append(vec, r.forStateSample(a, ts, float64(a.ActiveAt.Unix()))) + vec = append(vec, r.sample(a, ts.Add(-queryOffset))) + vec = append(vec, r.forStateSample(a, ts.Add(-queryOffset), float64(a.ActiveAt.Unix()))) } } diff --git a/rules/alerting_test.go b/rules/alerting_test.go index a9315b47e..5ebd049f6 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -123,7 +123,7 @@ func TestAlertingRuleTemplateWithHistogram(t *testing.T) { ) evalTime := time.Now() - res, err := rule.Eval(context.TODO(), evalTime, q, nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, q, nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -230,7 +230,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -247,7 +247,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -315,7 +315,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -329,7 +329,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } res, err = ruleWithExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -408,7 +408,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -422,7 +422,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } res, err = ruleWithExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -477,7 +477,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := rule.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -544,7 +544,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; close(getDoneCh) }() _, err = ruleWithQueryInTemplate.Eval( - context.TODO(), evalTime, slowQueryFunc, nil, 0, + context.TODO(), 0, evalTime, slowQueryFunc, nil, 0, ) require.NoError(t, err) } @@ -596,7 +596,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { "", true, log.NewNopLogger(), ) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying alert labels") } @@ -644,7 +644,7 @@ func TestAlertingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -871,7 +871,7 @@ func TestKeepFiringFor(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -888,7 +888,7 @@ func TestKeepFiringFor(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -925,7 +925,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { baseTime := time.Unix(0, 0) result.T = timestamp.FromTime(baseTime) - res, err := rule.Eval(context.TODO(), baseTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, baseTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -940,7 +940,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { } evalTime := baseTime.Add(time.Minute) - res, err = rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err = rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -974,7 +974,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { true, log.NewNopLogger(), ) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) diff --git a/rules/group.go b/rules/group.go index 1f4757de3..9ae89789d 100644 --- a/rules/group.go +++ b/rules/group.go @@ -47,6 +47,7 @@ type Group struct { name string file string interval time.Duration + queryOffset *time.Duration limit int rules []Rule seriesInPreviousEval []map[string]labels.Labels // One per Rule. @@ -90,6 +91,7 @@ type GroupOptions struct { Rules []Rule ShouldRestore bool Opts *ManagerOptions + QueryOffset *time.Duration done chan struct{} EvalIterationFunc GroupEvalIterationFunc } @@ -126,6 +128,7 @@ func NewGroup(o GroupOptions) *Group { name: o.Name, file: o.File, interval: o.Interval, + queryOffset: o.QueryOffset, limit: o.Limit, rules: o.Rules, shouldRestore: o.ShouldRestore, @@ -443,6 +446,8 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { wg sync.WaitGroup ) + ruleQueryOffset := g.QueryOffset() + for i, rule := range g.rules { select { case <-g.done: @@ -473,7 +478,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) + vector, err := rule.Eval(ctx, ruleQueryOffset, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) if err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) @@ -562,7 +567,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { for metric, lset := range g.seriesInPreviousEval[i] { if _, ok := seriesReturned[metric]; !ok { // Series no longer exposed, mark it stale. - _, err = app.Append(0, lset, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-ruleQueryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err @@ -601,14 +606,27 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.cleanupStaleSeries(ctx, ts) } +func (g *Group) QueryOffset() time.Duration { + if g.queryOffset != nil { + return *g.queryOffset + } + + if g.opts.DefaultRuleQueryOffset != nil { + return g.opts.DefaultRuleQueryOffset() + } + + return time.Duration(0) +} + func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { if len(g.staleSeries) == 0 { return } app := g.opts.Appendable.Appender(ctx) + queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. - _, err := app.Append(0, s, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err := app.Append(0, s, timestamp.FromTime(ts.Add(-queryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err diff --git a/rules/manager.go b/rules/manager.go index 165dca144..063189e0a 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -116,6 +116,7 @@ type ManagerOptions struct { ForGracePeriod time.Duration ResendDelay time.Duration GroupLoader GroupLoader + DefaultRuleQueryOffset func() time.Duration MaxConcurrentEvals int64 ConcurrentEvalsEnabled bool RuleConcurrencyController RuleConcurrencyController @@ -336,6 +337,7 @@ func (m *Manager) LoadGroups( Rules: rules, ShouldRestore: shouldRestore, Opts: m.opts, + QueryOffset: (*time.Duration)(rg.QueryOffset), done: m.done, EvalIterationFunc: groupEvalIterationFunc, }) diff --git a/rules/manager_test.go b/rules/manager_test.go index 2f7343ebb..11d1282bd 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -16,8 +16,10 @@ package rules import ( "context" "fmt" + "io/fs" "math" "os" + "path" "sort" "strconv" "sync" @@ -162,7 +164,7 @@ func TestAlertingRule(t *testing.T) { evalTime := baseTime.Add(test.time) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -192,152 +194,156 @@ func TestAlertingRule(t *testing.T) { } func TestForStateAddSamples(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - time.Minute, - 0, - labels.FromStrings("severity", "{{\"c\"}}ritical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) - result := promql.Vector{ - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - } - - baseTime := time.Unix(0, 0) - - tests := []struct { - time time.Duration - result promql.Vector - persistThisTime bool // If true, it means this 'time' is persisted for 'for'. - }{ - { - time: 0, - result: append(promql.Vector{}, result[:2]...), - persistThisTime: true, - }, - { - time: 5 * time.Minute, - result: append(promql.Vector{}, result[2:]...), - }, - { - time: 10 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - { - time: 15 * time.Minute, - result: nil, - }, - { - time: 20 * time.Minute, - result: nil, - }, - { - time: 25 * time.Minute, - result: append(promql.Vector{}, result[:1]...), - persistThisTime: true, - }, - { - time: 30 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - } - - var forState float64 - for i, test := range tests { - t.Logf("case %d", i) - evalTime := baseTime.Add(test.time) - - if test.persistThisTime { - forState = float64(evalTime.Unix()) - } - if test.result == nil { - forState = float64(value.StaleNaN) - } - - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) - require.NoError(t, err) - - var filteredRes promql.Vector // After removing 'ALERTS' samples. - for _, smpl := range res { - smplName := smpl.Metric.Get("__name__") - if smplName == "ALERTS_FOR_STATE" { - filteredRes = append(filteredRes, smpl) - } else { - // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. - require.Equal(t, "ALERTS", smplName) + rule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + time.Minute, + 0, + labels.FromStrings("severity", "{{\"c\"}}ritical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + result := promql.Vector{ + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, } - } - for i := range test.result { - test.result[i].T = timestamp.FromTime(evalTime) - // Updating the expected 'for' state. - if test.result[i].F >= 0 { - test.result[i].F = forState - } - } - require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) - sort.Slice(filteredRes, func(i, j int) bool { - return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + baseTime := time.Unix(0, 0) + + tests := []struct { + time time.Duration + result promql.Vector + persistThisTime bool // If true, it means this 'time' is persisted for 'for'. + }{ + { + time: 0, + result: append(promql.Vector{}, result[:2]...), + persistThisTime: true, + }, + { + time: 5 * time.Minute, + result: append(promql.Vector{}, result[2:]...), + }, + { + time: 10 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + { + time: 15 * time.Minute, + result: nil, + }, + { + time: 20 * time.Minute, + result: nil, + }, + { + time: 25 * time.Minute, + result: append(promql.Vector{}, result[:1]...), + persistThisTime: true, + }, + { + time: 30 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + } + + var forState float64 + for i, test := range tests { + t.Logf("case %d", i) + evalTime := baseTime.Add(test.time).Add(queryOffset) + + if test.persistThisTime { + forState = float64(evalTime.Unix()) + } + if test.result == nil { + forState = float64(value.StaleNaN) + } + + res, err := rule.Eval(context.TODO(), queryOffset, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + require.NoError(t, err) + + var filteredRes promql.Vector // After removing 'ALERTS' samples. + for _, smpl := range res { + smplName := smpl.Metric.Get("__name__") + if smplName == "ALERTS_FOR_STATE" { + filteredRes = append(filteredRes, smpl) + } else { + // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. + require.Equal(t, "ALERTS", smplName) + } + } + for i := range test.result { + test.result[i].T = timestamp.FromTime(evalTime.Add(-queryOffset)) + // Updating the expected 'for' state. + if test.result[i].F >= 0 { + test.result[i].F = forState + } + } + require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) + + sort.Slice(filteredRes, func(i, j int) bool { + return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + }) + prom_testutil.RequireEqual(t, test.result, filteredRes) + + for _, aa := range rule.ActiveAlerts() { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + } }) - prom_testutil.RequireEqual(t, test.result, filteredRes) - - for _, aa := range rule.ActiveAlerts() { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } } } @@ -349,243 +355,251 @@ func sortAlerts(items []*Alert) { } func TestForStateRestore(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), - Appendable: storage, - Queryable: storage, - Context: context.Background(), - Logger: log.NewNopLogger(), - NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, - OutageTolerance: 30 * time.Minute, - ForGracePeriod: 10 * time.Minute, - } - - alertForDuration := 25 * time.Minute - // Initial run before prometheus goes down. - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) - - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - groups := make(map[string]*Group) - groups["default;"] = group - - initialRuns := []time.Duration{0, 5 * time.Minute} - - baseTime := time.Unix(0, 0) - for _, duration := range initialRuns { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - - // Prometheus goes down here. We create new rules and groups. - type testInput struct { - name string - restoreDuration time.Duration - expectedAlerts []*Alert - - num int - noRestore bool - gracePeriod bool - downDuration time.Duration - before func() - } - - tests := []testInput{ - { - name: "normal restore (alerts were not firing)", - restoreDuration: 15 * time.Minute, - expectedAlerts: rule.ActiveAlerts(), - downDuration: 10 * time.Minute, - }, - { - name: "outage tolerance", - restoreDuration: 40 * time.Minute, - noRestore: true, - num: 2, - }, - { - name: "no active alerts", - restoreDuration: 50 * time.Minute, - expectedAlerts: []*Alert{}, - }, - { - name: "test the grace period", - restoreDuration: 25 * time.Minute, - expectedAlerts: []*Alert{}, - gracePeriod: true, - before: func() { - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - }, - num: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.before != nil { - tt.before() + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(testEngine, storage), + Appendable: storage, + Queryable: storage, + Context: context.Background(), + Logger: log.NewNopLogger(), + NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, + OutageTolerance: 30 * time.Minute, + ForGracePeriod: 10 * time.Minute, } - newRule := NewAlertingRule( + alertForDuration := 25 * time.Minute + // Initial run before prometheus goes down. + rule := NewAlertingRule( "HTTPRequestRateLow", expr, alertForDuration, 0, labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - newGroup := NewGroup(GroupOptions{ + + group := NewGroup(GroupOptions{ Name: "default", Interval: time.Second, - Rules: []Rule{newRule}, + Rules: []Rule{rule}, ShouldRestore: true, Opts: opts, }) + groups := make(map[string]*Group) + groups["default;"] = group - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup + initialRuns := []time.Duration{0, 5 * time.Minute} - restoreTime := baseTime.Add(tt.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + baseTime := time.Unix(0, 0) + for _, duration := range initialRuns { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - // In all cases, we expect the restoration process to have completed. - require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + // Prometheus goes down here. We create new rules and groups. + type testInput struct { + name string + restoreDuration time.Duration + expectedAlerts []*Alert - // Checking if we have restored it correctly. - switch { - case tt.noRestore: - require.Len(t, got, tt.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tt.gracePeriod: + num int + noRestore bool + gracePeriod bool + downDuration time.Duration + before func() + } - require.Len(t, got, tt.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tt.expectedAlerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) + tests := []testInput{ + { + name: "normal restore (alerts were not firing)", + restoreDuration: 15 * time.Minute, + expectedAlerts: rule.ActiveAlerts(), + downDuration: 10 * time.Minute, + }, + { + name: "outage tolerance", + restoreDuration: 40 * time.Minute, + noRestore: true, + num: 2, + }, + { + name: "no active alerts", + restoreDuration: 50 * time.Minute, + expectedAlerts: []*Alert{}, + }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + expectedAlerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, + } - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration).Add(queryOffset) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // In all cases, we expect the restoration process to have completed. + require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.expectedAlerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := queryOffset.Seconds() + float64(e.ActiveAt.Unix()+int64(tt.downDuration/time.Second)-got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } + }) } }) } } func TestStaleness(t *testing.T) { - st := teststorage.New(t) - defer st.Close() - engineOpts := promql.EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10, - Timeout: 10 * time.Second, + for _, queryOffset := range []time.Duration{0, time.Minute} { + st := teststorage.New(t) + defer st.Close() + engineOpts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(engineOpts) + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(engine, st), + Appendable: st, + Queryable: st, + Context: context.Background(), + Logger: log.NewNopLogger(), + } + + expr, err := parser.ParseExpr("a + 1") + require.NoError(t, err) + rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) + group := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{rule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + // A time series that has two samples and then goes stale. + app := st.Appender(context.Background()) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) + + err = app.Commit() + require.NoError(t, err) + + ctx := context.Background() + + // Execute 3 times, 1 second apart. + group.Eval(ctx, time.Unix(0, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(1, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(2, 0).Add(queryOffset)) + + querier, err := st.Querier(0, 2000) + require.NoError(t, err) + defer querier.Close() + + matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") + require.NoError(t, err) + + set := querier.Select(ctx, false, nil, matcher) + samples, err := readSeriesSet(set) + require.NoError(t, err) + + metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() + metricSample, ok := samples[metric] + + require.True(t, ok, "Series %s not returned.", metric) + require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) + metricSample[2].F = 42 // require.Equal cannot handle NaN. + + want := map[string][]promql.FPoint{ + metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, + } + + require.Equal(t, want, samples) } - engine := promql.NewEngine(engineOpts) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(engine, st), - Appendable: st, - Queryable: st, - Context: context.Background(), - Logger: log.NewNopLogger(), - } - - expr, err := parser.ParseExpr("a + 1") - require.NoError(t, err) - rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - - // A time series that has two samples and then goes stale. - app := st.Appender(context.Background()) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) - - err = app.Commit() - require.NoError(t, err) - - ctx := context.Background() - - // Execute 3 times, 1 second apart. - group.Eval(ctx, time.Unix(0, 0)) - group.Eval(ctx, time.Unix(1, 0)) - group.Eval(ctx, time.Unix(2, 0)) - - querier, err := st.Querier(0, 2000) - require.NoError(t, err) - defer querier.Close() - - matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") - require.NoError(t, err) - - set := querier.Select(ctx, false, nil, matcher) - samples, err := readSeriesSet(set) - require.NoError(t, err) - - metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() - metricSample, ok := samples[metric] - - require.True(t, ok, "Series %s not returned.", metric) - require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) - metricSample[2].F = 42 // require.Equal cannot handle NaN. - - want := map[string][]promql.FPoint{ - metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, - } - - require.Equal(t, want, samples) } // Convert a SeriesSet into a form usable with require.Equal. @@ -609,6 +623,46 @@ func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.FPoint, error) { return result, ss.Err() } +func TestGroup_QueryOffset(t *testing.T) { + config := ` +groups: + - name: group1 + query_offset: 2m + - name: group2 + query_offset: 0s + - name: group3 +` + + dir := t.TempDir() + fname := path.Join(dir, "rules.yaml") + err := os.WriteFile(fname, []byte(config), fs.ModePerm) + require.NoError(t, err) + + m := NewManager(&ManagerOptions{ + Logger: log.NewNopLogger(), + DefaultRuleQueryOffset: func() time.Duration { + return time.Minute + }, + }) + m.start() + err = m.Update(time.Second, []string{fname}, labels.EmptyLabels(), "", nil) + require.NoError(t, err) + + rgs := m.RuleGroups() + sort.Slice(rgs, func(i, j int) bool { + return rgs[i].Name() < rgs[j].Name() + }) + + // From config. + require.Equal(t, 2*time.Minute, rgs[0].QueryOffset()) + // Setting 0 in config is detected. + require.Equal(t, time.Duration(0), rgs[1].QueryOffset()) + // Default when nothing is set. + require.Equal(t, time.Minute, rgs[2].QueryOffset()) + + m.Stop() +} + func TestCopyState(t *testing.T) { oldGroup := &Group{ rules: []Rule{ diff --git a/rules/origin_test.go b/rules/origin_test.go index ca466301d..75c83f9a4 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -31,7 +31,7 @@ type unknownRule struct{} func (u unknownRule) Name() string { return "" } func (u unknownRule) Labels() labels.Labels { return labels.EmptyLabels() } -func (u unknownRule) Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { +func (u unknownRule) Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { return nil, nil } func (u unknownRule) String() string { return "" } diff --git a/rules/recording.go b/rules/recording.go index e2b0a31a0..17a75fdd1 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -77,10 +77,9 @@ func (rule *RecordingRule) Labels() labels.Labels { } // Eval evaluates the rule and then overrides the metric names and labels accordingly. -func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { +func (rule *RecordingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(rule)) - - vector, err := query(ctx, rule.vector.String(), ts) + vector, err := query(ctx, rule.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } diff --git a/rules/recording_test.go b/rules/recording_test.go index 49f37b1ac..fdddd4e02 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -126,7 +126,7 @@ func TestRuleEval(t *testing.T) { for _, scenario := range ruleEvalTestScenarios { t.Run(scenario.name, func(t *testing.T) { rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels) - result, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + result, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) testutil.RequireEqual(t, scenario.expected, result) }) @@ -144,7 +144,7 @@ func BenchmarkRuleEval(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + _, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) if err != nil { require.NoError(b, err) } @@ -173,7 +173,7 @@ func TestRuleEvalDuplicate(t *testing.T) { expr, _ := parser.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`) rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test")) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying rule labels") } @@ -215,7 +215,7 @@ func TestRecordingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -243,7 +243,7 @@ func TestRecordingEvalWithOrigin(t *testing.T) { require.NoError(t, err) rule := NewRecordingRule(name, expr, lbs) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) diff --git a/rules/rule.go b/rules/rule.go index 59af3e0bb..687c03d00 100644 --- a/rules/rule.go +++ b/rules/rule.go @@ -40,7 +40,7 @@ type Rule interface { // Labels of the rule. Labels() labels.Labels // Eval evaluates the rule, including any associated recording or alerting actions. - Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) + Eval(ctx context.Context, queryOffset time.Duration, evaluationTime time.Time, queryFunc QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) // String returns a human-readable string representation of the rule. String() string // Query returns the rule query expression.