diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index bf7f681b6..463a725b7 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,7 +12,7 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 669305ebd..ce2014ecf 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2714211dd..b7074a887 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,12 +13,12 @@ jobs: # should also be updated. image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment with: enable_npm: true - - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 test-flags="" + - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 - run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false - run: make -C documentation/examples/remote_storage - run: make -C documentation/examples @@ -29,8 +29,8 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./cmd/prometheus @@ -48,7 +48,7 @@ jobs: # The go version in this image should be N-1 wrt test_go. image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -62,8 +62,8 @@ jobs: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -79,7 +79,7 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: go-version: 1.23.x @@ -96,7 +96,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -121,8 +121,8 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -146,8 +146,8 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -169,7 +169,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: @@ -182,7 +182,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: @@ -191,11 +191,11 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.60.2 + version: v1.61.0 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' @@ -208,8 +208,8 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -225,8 +225,8 @@ jobs: || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -240,10 +240,10 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - - uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - name: Install nodejs - uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version-file: "web/ui/.nvmrc" registry-url: "https://registry.npmjs.org" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 77fbd4daf..9002f4c8e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index 144859486..dcca16ff3 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -18,7 +18,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -40,7 +40,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 80356e45b..5f1b0f25c 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index aa306c46d..a659d431d 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index c63727f7f..4586f4617 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # tag=v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 with: persist-credentials: false @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # tag=v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # tag=v4.4.3 with: name: SARIF file path: results.sarif diff --git a/.golangci.yml b/.golangci.yml index c512101e1..dfc74139f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -109,7 +109,7 @@ linters-settings: extra-rules: true perfsprint: # Optimizes `fmt.Errorf`. - errorf: false + errorf: true revive: # By default, revive will enable only the linting rules that are named in the configuration file. # So, it's needed to explicitly enable all required rules here. diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a06785d2..2163c3b0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ ## unreleased +* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 +* [CHANGE] Remote-write: default enable_http2 to false. #15219 +* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 +* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 +* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 +* [CHANGE] Disallow configuring AM with the v1 api. #13883 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251 * [BUGFIX] Clamp functions: Ignore any points with native histograms. #15169 ## 3.0.0-beta.1 / 2024-10-09 @@ -18,7 +30,6 @@ * [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 * [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 * [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 -* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 * [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 * [ENHANCEMENT] Consul SD: Support catalog filters. #11224 * [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 @@ -50,12 +61,17 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 * [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 * [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 +* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 * [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 -## 2.55.0-rc.0 / 2024-09-20 +## 2.55.1 / 2024-01-04 +* [BUGFIX] `round()` function did not remove `__name__` label. #15250 + +## 2.55.0 / 2024-10-22 + +* [FEATURE] PromQL: Add experimental `info` function. #14495 * [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 -* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 * [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 * [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 * [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 @@ -63,19 +79,21 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 * [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 * [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532 * [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 * [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 * [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 * [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 * [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 -* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 * [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 * [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 * [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 * [ENHANCEMENT] API: Support multiple listening addresses. #14665 * [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 -* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948 +* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 * [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] PromQL: make sort_by_label stable. #14985 * [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 * [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 * [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 diff --git a/Dockerfile b/Dockerfile index b96b3b765..1e46a62f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ ARG ARCH="amd64" ARG OS="linux" FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest LABEL maintainer="The Prometheus Authors " +LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus" ARG ARCH="amd64" ARG OS="linux" diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index b84b4edf6..974cd7edb 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -190,21 +190,20 @@ type flagConfig struct { queryConcurrency int queryMaxSamples int RemoteFlushDeadline model.Duration - nameEscapingScheme string maxNotificationsSubscribers int enableAutoReload bool autoReloadInterval model.Duration - featureList []string - memlimitRatio float64 + memlimitEnable bool + memlimitRatio float64 + + featureList []string // These options are extracted from featureList // for ease of use. - enableExpandExternalLabels bool - enablePerStepStats bool - enableAutoGOMAXPROCS bool - enableAutoGOMEMLIMIT bool - enableConcurrentRuleEval bool + enablePerStepStats bool + enableAutoGOMAXPROCS bool + enableConcurrentRuleEval bool prometheusURL string corsRegexString string @@ -220,9 +219,6 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "expand-external-labels": - c.enableExpandExternalLabels = true - logger.Info("Experimental expand-external-labels enabled") case "exemplar-storage": c.tsdb.EnableExemplarStorage = true logger.Info("Experimental in-memory exemplar storage enabled") @@ -247,9 +243,6 @@ func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { c.autoReloadInterval, _ = model.ParseDuration("1s") } logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval) - case "auto-gomemlimit": - c.enableAutoGOMEMLIMIT = true - logger.Info("Automatically set GOMEMLIMIT to match Linux container or system memory limit") case "concurrent-rule-eval": c.enableConcurrentRuleEval = true logger.Info("Experimental concurrent rule evaluation enabled.") @@ -336,6 +329,8 @@ func main() { a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated."). Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses) + a.Flag("auto-gomemlimit", "Automatically set GOMEMLIMIT to match Linux container or system memory limit"). + Default("true").BoolVar(&cfg.memlimitEnable) a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory"). Default("0.9").FloatVar(&cfg.memlimitRatio) @@ -437,6 +432,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk."). Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk) + serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled."). + Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent) + agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage."). Default("data-agent/").StringVar(&cfg.agentStoragePath) @@ -516,7 +514,7 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) @@ -552,15 +550,6 @@ func main() { os.Exit(1) } - if cfg.nameEscapingScheme != "" { - scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme) - if err != nil { - fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme) - os.Exit(1) - } - model.NameEscapingScheme = scheme - } - if agentMode && len(serverOnlyFlags) > 0 { fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags) os.Exit(3) @@ -595,7 +584,7 @@ func main() { // Throw error for invalid config before starting other components. var cfgFile *config.Config - if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, promslog.NewNopLogger()); err != nil { + if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { absPath, pathErr := filepath.Abs(cfg.configFile) if pathErr != nil { absPath = cfg.configFile @@ -667,6 +656,12 @@ func main() { cfg.tsdb.MaxBlockDuration = maxBlockDuration } + + // Delayed compaction checks + if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) { + logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent) + cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent + } } noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{} @@ -770,7 +765,7 @@ func main() { } } - if cfg.enableAutoGOMEMLIMIT { + if cfg.memlimitEnable { if _, err := memlimit.SetGoMemLimitWithOpts( memlimit.WithRatio(cfg.memlimitRatio), memlimit.WithProvider( @@ -1145,7 +1140,7 @@ func main() { for { select { case <-hup: - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) } else if cfg.enableAutoReload { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { @@ -1155,7 +1150,7 @@ func main() { } } case rc := <-webHandler.Reload(): - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) rc <- err } else { @@ -1180,7 +1175,7 @@ func main() { } logger.Info("Configuration file change detected, reloading the configuration.") - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { logger.Error("Error reloading config", "err", err) } else { checksum = currentChecksum @@ -1210,7 +1205,7 @@ func main() { return nil } - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err) } @@ -1437,7 +1432,7 @@ type reloader struct { reloader func(*config.Config) error } -func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { +func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { start := time.Now() timingsLogger := logger logger.Info("Loading configuration file", "filename", filename) @@ -1453,7 +1448,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b } }() - conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger) + conf, err := config.LoadFile(filename, agentMode, logger) if err != nil { return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err) } @@ -1643,6 +1638,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender { type notReadyAppender struct{} +// SetOptions does nothing in this appender implementation. +func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {} + func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1797,6 +1795,7 @@ type tsdbOptions struct { EnableMemorySnapshotOnShutdown bool EnableNativeHistograms bool EnableDelayedCompaction bool + CompactionDelayMaxPercent int EnableOverlappingCompaction bool EnableOOONativeHistograms bool } @@ -1821,6 +1820,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { EnableOOONativeHistograms: opts.EnableOOONativeHistograms, OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow, EnableDelayedCompaction: opts.EnableDelayedCompaction, + CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, } } diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index d0c2846be..4bd1c71b2 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -125,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() fakeInputFile := "fake-input-file" expectedExitStatus := 2 @@ -211,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "9MB", + exitCode: 1, + }, + { + size: "257MB", + exitCode: 1, + }, + { + size: "10", + exitCode: 2, + }, + { + size: "1GB", + exitCode: 1, + }, + { + size: "12MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { - t.Parallel() - if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "512KB", + exitCode: 1, + }, + { + size: "1MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } @@ -353,6 +396,8 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) @@ -371,6 +416,8 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} @@ -398,6 +445,8 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) @@ -419,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() testcases := []struct { mode string @@ -433,6 +483,7 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { @@ -484,6 +535,8 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -508,6 +561,8 @@ func TestDocumentation(t *testing.T) { } func TestRwProtoMsgFlagParser(t *testing.T) { + t.Parallel() + defaultOpts := config.RemoteWriteProtoMsgs{ config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2, } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 2011fb123..94eec27e7 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index f05ad9df2..25abf5e96 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -456,6 +456,7 @@ func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() cwd, err := os.Getwd() require.NoError(t, err) @@ -474,6 +475,7 @@ func TestQueryLog(t *testing.T) { } t.Run(p.String(), func(t *testing.T) { + t.Parallel() p.run(t) }) } diff --git a/cmd/promtool/analyze.go b/cmd/promtool/analyze.go index c1f523de5..26e6f2188 100644 --- a/cmd/promtool/analyze.go +++ b/cmd/promtool/analyze.go @@ -34,8 +34,8 @@ import ( ) var ( - errNotNativeHistogram = fmt.Errorf("not a native histogram") - errNotEnoughData = fmt.Errorf("not enough data") + errNotNativeHistogram = errors.New("not a native histogram") + errNotEnoughData = errors.New("not enough data") outputHeader = `Bucket stats for each histogram series over time ------------------------------------------------ @@ -169,7 +169,7 @@ func querySamples(ctx context.Context, api v1.API, query string, end time.Time) matrix, ok := values.(model.Matrix) if !ok { - return nil, fmt.Errorf("query of buckets resulted in non-Matrix") + return nil, errors.New("query of buckets resulted in non-Matrix") } return matrix, nil @@ -259,7 +259,7 @@ func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int, prev := matrix[i].Values[timeIdx] // Assume the results are nicely aligned. if curr.Timestamp != prev.Timestamp { - return counts, fmt.Errorf("matrix result is not time aligned") + return counts, errors.New("matrix result is not time aligned") } counts[i+1] = int(curr.Value - prev.Value) } diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 1408975df..125c9a08e 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -49,7 +49,7 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { _, ts, _ := p.Series() if ts == nil { - return 0, 0, fmt.Errorf("expected timestamp for series got none") + return 0, 0, errors.New("expected timestamp for series got none") } if *ts > maxt { diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 159fae764..b52fe7cdb 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -58,6 +58,7 @@ import ( _ "github.com/prometheus/prometheus/plugins" // Register plugins. "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) @@ -216,6 +217,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" @@ -391,6 +393,7 @@ func main() { }, *testRulesRun, *testRulesDiff, + *testRulesDebug, *testRulesFiles...), ) @@ -441,7 +444,7 @@ func checkExperimental(f bool) { } } -var errLint = fmt.Errorf("lint error") +var errLint = errors.New("lint error") type lintConfig struct { all bool @@ -575,7 +578,7 @@ func checkFileExists(fn string) error { func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { fmt.Println("Checking", filename) - cfg, err := config.LoadFile(filename, agentMode, false, promslog.NewNopLogger()) + cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { return nil, err } @@ -895,30 +898,30 @@ func compare(a, b compareRuleType) int { func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - var rules compareRuleTypes + var cRules compareRuleTypes for _, group := range groups { for _, rule := range group.Rules { - rules = append(rules, compareRuleType{ + cRules = append(cRules, compareRuleType{ metric: ruleMetric(rule), - label: labels.FromMap(rule.Labels), + label: rules.FromMaps(group.Labels, rule.Labels), }) } } - if len(rules) < 2 { + if len(cRules) < 2 { return duplicates } - sort.Sort(rules) + sort.Sort(cRules) - last := rules[0] - for i := 1; i < len(rules); i++ { - if compare(last, rules[i]) == 0 { + last := cRules[0] + for i := 1; i < len(cRules); i++ { + if compare(last, cRules[i]) == 0 { // Don't add a duplicated rule multiple times. if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 { - duplicates = append(duplicates, rules[i]) + duplicates = append(duplicates, cRules[i]) } } - last = rules[i] + last = cRules[i] } return duplicates diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index 5c00dab03..5e005bca8 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -41,7 +41,7 @@ type sdCheckResult struct { func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int { logger := promslog.New(&promslog.Config{}) - cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) + cfg, err := config.LoadFile(sdConfigFiles, false, logger) if err != nil { fmt.Fprintln(os.Stderr, "Cannot load config", err) return failureExitCode diff --git a/cmd/promtool/testdata/config_with_service_discovery_files.yml b/cmd/promtool/testdata/config_with_service_discovery_files.yml index 13b6d7faf..6a550a840 100644 --- a/cmd/promtool/testdata/config_with_service_discovery_files.yml +++ b/cmd/promtool/testdata/config_with_service_discovery_files.yml @@ -6,7 +6,7 @@ scrape_configs: alerting: alertmanagers: - scheme: http - api_version: v1 + api_version: v2 file_sd_configs: - files: - nonexistent_file.yml diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 727275aa6..8a5a72887 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -405,7 +405,7 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) } } - b, err := db.Block(blockID) + b, err := db.Block(blockID, tsdb.DefaultPostingsDecoderFactory) if err != nil { return nil, nil, err } @@ -662,7 +662,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. histogramChunkSize = append(histogramChunkSize, len(chk.Bytes())) fhchk, ok := chk.(*chunkenc.FloatHistogramChunk) if !ok { - return fmt.Errorf("chunk is not FloatHistogramChunk") + return errors.New("chunk is not FloatHistogramChunk") } it := fhchk.Iterator(nil) bucketCount := 0 @@ -677,7 +677,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. histogramChunkSize = append(histogramChunkSize, len(chk.Bytes())) hchk, ok := chk.(*chunkenc.HistogramChunk) if !ok { - return fmt.Errorf("chunk is not HistogramChunk") + return errors.New("chunk is not HistogramChunk") } it := hchk.Iterator(nil) bucketCount := 0 diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 667e74806..78dacdc56 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,14 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { + if debug { + testStart := time.Now() + fmt.Printf("DEBUG: Starting test %s\n", testname) + defer func() { + fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart)) + }() + } // Setup testing suite. suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { @@ -482,6 +489,32 @@ Outer: } } + if debug { + ts := tg.maxEvalTime() + // Potentially a test can be specified at a time with fractional seconds, + // which PromQL cannot represent, so round up to the next whole second. + ts = (ts + time.Second).Truncate(time.Second) + expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts) + q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts)) + if err != nil { + fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err) + return errs + } + res := q.Exec(suite.Context()) + if res.Err != nil { + fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err) + return errs + } + switch v := res.Value.(type) { + case promql.Matrix: + fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts) + fmt.Println(v.String()) + default: + fmt.Printf("DEBUG: Got unexpected type %T\n", v) + return errs + } + } + if len(errs) > 0 { return errs } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 9bbac28e9..9b73dcdc1 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -141,14 +141,14 @@ func TestRulesUnitTest(t *testing.T) { reuseCount[tt.want] += len(tt.args.files) } t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) } t.Run("Junit xml output ", func(t *testing.T) { var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -230,7 +230,7 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...) + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/config/config.go b/config/config.go index 3f35a195d..ef3ea5e67 100644 --- a/config/config.go +++ b/config/config.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "log/slog" + "mime" "net/url" "os" "path/filepath" @@ -72,7 +73,7 @@ const ( ) // Load parses the YAML input s into a Config. -func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, error) { +func Load(s string, logger *slog.Logger) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry @@ -84,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, er return nil, err } - if !expandExternalLabels { - return cfg, nil - } - b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -106,17 +103,31 @@ func Load(s string, expandExternalLabels bool, logger *slog.Logger) (*Config, er // Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024 b.Add(v.Name, newV) }) - cfg.GlobalConfig.ExternalLabels = b.Labels() + if !b.Labels().IsEmpty() { + cfg.GlobalConfig.ExternalLabels = b.Labels() + } + + switch cfg.OTLPConfig.TranslationStrategy { + case UnderscoreEscapingWithSuffixes: + case "": + case NoUTF8EscapingWithSuffixes: + if cfg.GlobalConfig.MetricNameValidationScheme == LegacyValidationConfig { + return nil, errors.New("OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled") + } + default: + return nil, fmt.Errorf("unsupported OTLP translation strategy %q", cfg.OTLPConfig.TranslationStrategy) + } + return cfg, nil } // LoadFile parses the given YAML file into a Config. -func LoadFile(filename string, agentMode, expandExternalLabels bool, logger *slog.Logger) (*Config, error) { +func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) { content, err := os.ReadFile(filename) if err != nil { return nil, err } - cfg, err := Load(string(content), expandExternalLabels, logger) + cfg, err := Load(string(content), logger) if err != nil { return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err) } @@ -165,13 +176,13 @@ var ( // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. - ScrapeClassicHistograms: false, - MetricsPath: "/metrics", - Scheme: "http", - HonorLabels: false, - HonorTimestamps: true, - HTTPClientConfig: config.DefaultHTTPClientConfig, - EnableCompression: true, + AlwaysScrapeClassicHistograms: false, + MetricsPath: "/metrics", + Scheme: "http", + HonorLabels: false, + HonorTimestamps: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. @@ -182,13 +193,18 @@ var ( HTTPClientConfig: config.DefaultHTTPClientConfig, } + DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: false, + } + // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), ProtobufMessage: RemoteWriteProtoMsgV1, QueueConfig: DefaultQueueConfig, MetadataConfig: DefaultMetadataConfig, - HTTPClientConfig: config.DefaultHTTPClientConfig, + HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig, } // DefaultQueueConfig is the default remote queue configuration. @@ -235,7 +251,9 @@ var ( } // DefaultOTLPConfig is the default OTLP configuration. - DefaultOTLPConfig = OTLPConfig{} + DefaultOTLPConfig = OTLPConfig{ + TranslationStrategy: UnderscoreEscapingWithSuffixes, + } ) // Config is the top-level configuration for Prometheus's config files. @@ -475,9 +493,22 @@ func (s ScrapeProtocol) Validate() error { return nil } +// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol. +func (s ScrapeProtocol) HeaderMediaType() string { + if _, ok := ScrapeProtocolsHeaders[s]; !ok { + return "" + } + mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s]) + if err != nil { + return "" + } + return mediaType +} + var ( PrometheusProto ScrapeProtocol = "PrometheusProto" PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" + PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8 @@ -485,6 +516,7 @@ var ( ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", PrometheusText0_0_4: "text/plain;version=0.0.4", + PrometheusText1_0_0: "text/plain;version=1.0.0;escaping=allow-utf-8", OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1", OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } @@ -494,6 +526,7 @@ var ( DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } @@ -505,6 +538,7 @@ var ( PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } ) @@ -631,10 +665,17 @@ type ScrapeConfig struct { // The protocols to negotiate during a scrape. It tells clients what // protocol are accepted by Prometheus and with what preference (most wanted is first). // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, - // OpenMetricsText1.0.0, PrometheusText0.0.4. + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` - // Whether to scrape a classic histogram that is also exposed as a native histogram. - ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"` + // The fallback protocol to use if the Content-Type provided by the target + // is not provided, blank, or not one of the expected values. + // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. + ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"` + // Whether to scrape a classic histogram, even if it is also exposed as a native histogram. + AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"` + // Whether to convert all scraped classic histograms into a native histogram with custom buckets. + ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"` // File to which scrape failures are logged. ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The HTTP resource path on which to fetch metrics from targets. @@ -782,6 +823,12 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) } + if c.ScrapeFallbackProtocol != "" { + if err := c.ScrapeFallbackProtocol.Validate(); err != nil { + return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err) + } + } + switch globalConfig.MetricNameValidationScheme { case LegacyValidationConfig: case "", UTF8ValidationConfig: @@ -957,6 +1004,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. +// 'v1' is no longer supported. type AlertmanagerAPIVersion string // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -986,7 +1034,7 @@ const ( ) var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ - AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, + AlertmanagerAPIVersionV2, } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. @@ -1038,7 +1086,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil if httpClientConfigAuthEnabled && c.SigV4Config != nil { - return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured") + return errors.New("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured") } // Check for users putting URLs in target groups. @@ -1368,9 +1416,20 @@ func getGoGCEnv() int { return DefaultRuntimeConfig.GoGC } +type translationStrategyOption string + +var ( + // NoUTF8EscapingWithSuffixes will keep UTF-8 characters as they are, units and type suffixes will still be added. + NoUTF8EscapingWithSuffixes translationStrategyOption = "NoUTF8EscapingWithSuffixes" + // UnderscoreEscapingWithSuffixes is the default option for translating OTLP to Prometheus. + // This option will translate all UTF-8 characters to underscores, while adding units and type suffixes. + UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes" +) + // OTLPConfig is the configuration for writing to the OTLP endpoint. type OTLPConfig struct { - PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -1386,7 +1445,7 @@ func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { for i, attr := range c.PromoteResourceAttributes { attr = strings.TrimSpace(attr) if attr == "" { - err = errors.Join(err, fmt.Errorf("empty promoted OTel resource attribute")) + err = errors.Join(err, errors.New("empty promoted OTel resource attribute")) continue } if _, exists := seen[attr]; exists { diff --git a/config/config_test.go b/config/config_test.go index 07f071ffe..77cbf9b2e 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -142,7 +142,7 @@ var expectedConf = &Config{ }, }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, }, { @@ -158,7 +158,7 @@ var expectedConf = &Config{ KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, Headers: map[string]string{"name": "value"}, }, @@ -168,6 +168,7 @@ var expectedConf = &Config{ PromoteResourceAttributes: []string{ "k8s.cluster.name", "k8s.job.name", "k8s.namespace.name", }, + TranslationStrategy: UnderscoreEscapingWithSuffixes, }, RemoteReadConfigs: []*RemoteReadConfig{ @@ -206,19 +207,20 @@ var expectedConf = &Config{ { JobName: "prometheus", - HonorLabels: true, - HonorTimestamps: true, - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EnableCompression: true, - BodySizeLimit: globBodySizeLimit, - SampleLimit: globSampleLimit, - TargetLimit: globTargetLimit, - LabelLimit: globLabelLimit, - LabelNameLengthLimit: globLabelNameLengthLimit, - LabelValueLengthLimit: globLabelValueLengthLimit, - ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, - ScrapeFailureLogFile: "testdata/fail_prom.log", + HonorLabels: true, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EnableCompression: true, + BodySizeLimit: globBodySizeLimit, + SampleLimit: globSampleLimit, + TargetLimit: globTargetLimit, + LabelLimit: globLabelLimit, + LabelNameLengthLimit: globLabelNameLengthLimit, + LabelValueLengthLimit: globLabelValueLengthLimit, + ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFallbackProtocol: PrometheusText0_0_4, + ScrapeFailureLogFile: "testdata/fail_prom.log", MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1500,8 +1502,13 @@ var expectedConf = &Config{ }, } +func TestYAMLNotLongerSupportedAMApi(t *testing.T) { + _, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger()) + require.Error(t, err) +} + func TestYAMLRoundtrip(t *testing.T) { - want, err := LoadFile("testdata/roundtrip.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1514,7 +1521,7 @@ func TestYAMLRoundtrip(t *testing.T) { } func TestRemoteWriteRetryOnRateLimit(t *testing.T) { - want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1529,7 +1536,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) { func TestOTLPSanitizeResourceAttributes(t *testing.T) { t.Run("good config", func(t *testing.T) { - want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, promslog.NewNopLogger()) + want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1541,25 +1548,87 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) t.Run("bad config", func(t *testing.T) { - _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, promslog.NewNopLogger()) + _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger()) require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`) require.ErrorContains(t, err, `empty promoted OTel resource attribute`) }) } +func TestOTLPAllowUTF8(t *testing.T) { + t.Run("good config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml") + verify := func(t *testing.T, conf *Config, err error) { + t.Helper() + require.NoError(t, err) + require.Equal(t, NoUTF8EscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy) + } + + t.Run("LoadFile", func(t *testing.T) { + conf, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, conf, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + conf, err := Load(string(content), promslog.NewNopLogger()) + verify(t, conf, err) + }) + }) + + t.Run("incompatible config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + t.Log("err", err) + verify(t, err) + }) + }) + + t.Run("bad config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.bad.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `unsupported OTLP translation strategy "Invalid"`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + verify(t, err) + }) + }) +} + func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. - _, err := LoadFile("testdata/global_timeout.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - c, err := LoadFile("testdata/conf.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) + require.NoError(t, err) require.Equal(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { - c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.ScrapeConfigs, 1) for _, sc := range c.ScrapeConfigs { @@ -1569,7 +1638,7 @@ func TestScrapeIntervalLarger(t *testing.T) { // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { - c, err := LoadFile("testdata/conf.good.yml", false, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) @@ -1586,31 +1655,31 @@ func TestElideSecrets(t *testing.T) { func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path - c, err := LoadFile(ruleFilesConfigFile, false, false, promslog.NewNopLogger()) + c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger()) require.NoError(t, err) require.Equal(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesWithKubeConfig(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesSelectors(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } @@ -2080,12 +2149,20 @@ var expectedErrors = []struct { }, { filename: "scrape_config_files_scrape_protocols.bad.yml", - errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`, + errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`, }, { filename: "scrape_config_files_scrape_protocols2.bad.yml", errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`, }, + { + filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml", + errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`, + }, + { + filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml", + errMsg: `unmarshal errors`, + }, } func TestBadConfigs(t *testing.T) { @@ -2094,7 +2171,7 @@ func TestBadConfigs(t *testing.T) { model.NameValidationScheme = model.UTF8Validation }() for _, ee := range expectedErrors { - _, err := LoadFile("testdata/"+ee.filename, false, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger()) require.ErrorContains(t, err, ee.errMsg, "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } @@ -2125,7 +2202,7 @@ func TestBadStaticConfigsYML(t *testing.T) { } func TestEmptyConfig(t *testing.T) { - c, err := Load("", false, promslog.NewNopLogger()) + c, err := Load("", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig require.Equal(t, exp, *c) @@ -2135,38 +2212,34 @@ func TestExpandExternalLabels(t *testing.T) { // Cleanup ant TEST env variable that could exist on the system. os.Setenv("TEST", "") - c, err := LoadFile("testdata/external_labels.good.yml", false, false, promslog.NewNopLogger()) - require.NoError(t, err) - testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels) - - c, err = LoadFile("testdata/external_labels.good.yml", false, true, promslog.NewNopLogger()) + c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) os.Setenv("TEST", "TestValue") - c, err = LoadFile("testdata/external_labels.good.yml", false, true, promslog.NewNopLogger()) + c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) } func TestAgentMode(t *testing.T) { - _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, promslog.NewNopLogger()) + _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field rule_files is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, promslog.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field remote_read is not allowed in agent mode") - c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, promslog.NewNopLogger()) + c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Empty(t, c.RemoteWriteConfigs) - c, err = LoadFile("testdata/agent_mode.good.yml", true, false, promslog.NewNopLogger()) + c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.RemoteWriteConfigs, 1) require.Equal( @@ -2177,7 +2250,7 @@ func TestAgentMode(t *testing.T) { } func TestEmptyGlobalBlock(t *testing.T) { - c, err := Load("global:\n", false, promslog.NewNopLogger()) + c, err := Load("global:\n", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig exp.Runtime = DefaultRuntimeConfig @@ -2332,7 +2405,7 @@ func TestGetScrapeConfigs(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c, err := LoadFile(tc.configFile, false, false, promslog.NewNopLogger()) + c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger()) require.NoError(t, err) scfgs, err := c.GetScrapeConfigs() @@ -2350,7 +2423,7 @@ func kubernetesSDHostURL() config.URL { } func TestScrapeConfigDisableCompression(t *testing.T) { - want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, promslog.NewNopLogger()) + want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2397,7 +2470,7 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, promslog.NewNopLogger()) + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2410,3 +2483,54 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { }) } } + +func TestScrapeProtocolHeader(t *testing.T) { + tests := []struct { + name string + proto ScrapeProtocol + expectedValue string + }{ + { + name: "blank", + proto: ScrapeProtocol(""), + expectedValue: "", + }, + { + name: "invalid", + proto: ScrapeProtocol("invalid"), + expectedValue: "", + }, + { + name: "prometheus protobuf", + proto: PrometheusProto, + expectedValue: "application/vnd.google.protobuf", + }, + { + name: "prometheus text 0.0.4", + proto: PrometheusText0_0_4, + expectedValue: "text/plain", + }, + { + name: "prometheus text 1.0.0", + proto: PrometheusText1_0_0, + expectedValue: "text/plain", + }, + { + name: "openmetrics 0.0.1", + proto: OpenMetricsText0_0_1, + expectedValue: "application/openmetrics-text", + }, + { + name: "openmetrics 1.0.0", + proto: OpenMetricsText1_0_0, + expectedValue: "application/openmetrics-text", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mediaType := tc.proto.HeaderMediaType() + + require.Equal(t, tc.expectedValue, mediaType) + }) + } +} diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 9eb799543..2501652d5 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -74,6 +74,8 @@ scrape_configs: # metrics_path defaults to '/metrics' # scheme defaults to 'http'. + fallback_scrape_protocol: PrometheusText0.0.4 + scrape_failure_log_file: fail_prom.log file_sd_configs: - files: diff --git a/config/testdata/config_with_deprecated_am_api_config.yml b/config/testdata/config_with_deprecated_am_api_config.yml new file mode 100644 index 000000000..ac89537ff --- /dev/null +++ b/config/testdata/config_with_deprecated_am_api_config.yml @@ -0,0 +1,7 @@ +alerting: + alertmanagers: + - scheme: http + api_version: v1 + file_sd_configs: + - files: + - nonexistent_file.yml diff --git a/config/testdata/otlp_allow_utf8.bad.yml b/config/testdata/otlp_allow_utf8.bad.yml new file mode 100644 index 000000000..488e4b055 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.bad.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: Invalid diff --git a/config/testdata/otlp_allow_utf8.good.yml b/config/testdata/otlp_allow_utf8.good.yml new file mode 100644 index 000000000..f3069d2fd --- /dev/null +++ b/config/testdata/otlp_allow_utf8.good.yml @@ -0,0 +1,2 @@ +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/config/testdata/otlp_allow_utf8.incompatible.yml b/config/testdata/otlp_allow_utf8.incompatible.yml new file mode 100644 index 000000000..2625c2413 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.incompatible.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml new file mode 100644 index 000000000..07cfe4759 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: "prometheusproto" + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml new file mode 100644 index 000000000..c5d133f9c --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"] + static_configs: + - targets: ['localhost:8080'] diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index 51eec8dba..0f35c401e 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -148,7 +149,7 @@ type EC2Discovery struct { *refresh.Discovery logger *slog.Logger cfg *EC2SDConfig - ec2 *ec2.EC2 + ec2 ec2iface.EC2API // azToAZID maps this account's availability zones to their underlying AZ // ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so @@ -160,7 +161,7 @@ type EC2Discovery struct { func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { m, ok := metrics.(*ec2Metrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { @@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.D return d, nil } -func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) { +func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) { if d.ec2 != nil { return d.ec2, nil } diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go new file mode 100644 index 000000000..f34065c23 --- /dev/null +++ b/discovery/aws/ec2_test.go @@ -0,0 +1,434 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aws + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Helper function to get pointers on literals. +// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package. +func strptr(str string) *string { + return &str +} + +func boolptr(b bool) *bool { + return &b +} + +func int64ptr(i int64) *int64 { + return &i +} + +// Struct for test data. +type ec2DataStore struct { + region string + + azToAZID map[string]string + + ownerID string + + instances []*ec2.Instance +} + +// The tests itself. +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestEC2DiscoveryRefreshAZIDs(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + shouldFail bool + ec2Data *ec2DataStore + }{ + { + name: "Normal", + shouldFail: false, + ec2Data: &ec2DataStore{ + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + }, + }, + { + name: "HandleError", + shouldFail: true, + ec2Data: &ec2DataStore{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + } + + err := d.refreshAZIDs(ctx) + if tt.shouldFail { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, client.ec2Data.azToAZID, d.azToAZID) + } + }) + } +} + +func TestEC2DiscoveryRefresh(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + ec2Data *ec2DataStore + expected []*targetgroup.Group + }{ + { + name: "NoPrivateIp", + ec2Data: &ec2DataStore{ + region: "region-noprivateip", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + InstanceId: strptr("instance-id-noprivateip"), + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-noprivateip", + }, + }, + }, + { + name: "NoVpc", + ec2Data: &ec2DataStore{ + region: "region-novpc", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + ownerID: "owner-id-novpc", + instances: []*ec2.Instance{ + { + // set every possible options and test them here + Architecture: strptr("architecture-novpc"), + ImageId: strptr("ami-novpc"), + InstanceId: strptr("instance-id-novpc"), + InstanceLifecycle: strptr("instance-lifecycle-novpc"), + InstanceType: strptr("instance-type-novpc"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + Platform: strptr("platform-novpc"), + PrivateDnsName: strptr("private-dns-novpc"), + PrivateIpAddress: strptr("1.2.3.4"), + PublicDnsName: strptr("public-dns-novpc"), + PublicIpAddress: strptr("42.42.42.2"), + State: &ec2.InstanceState{Name: strptr("running")}, + // test tags once and for all + Tags: []*ec2.Tag{ + {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")}, + {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")}, + nil, + {Value: strptr("tag-4-value")}, + {Key: strptr("tag-5-key")}, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-novpc", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("1.2.3.4:4242"), + "__meta_ec2_ami": model.LabelValue("ami-novpc"), + "__meta_ec2_architecture": model.LabelValue("architecture-novpc"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"), + "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"), + "__meta_ec2_platform": model.LabelValue("platform-novpc"), + "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"), + "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"), + "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"), + "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"), + "__meta_ec2_region": model.LabelValue("region-novpc"), + "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"), + "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"), + }, + }, + }, + }, + }, + { + name: "Ipv4", + ec2Data: &ec2DataStore{ + region: "region-ipv4", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv4"), + InstanceId: strptr("instance-id-ipv4"), + InstanceType: strptr("instance-type-ipv4"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")}, + PrivateIpAddress: strptr("5.6.7.8"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-3"), + VpcId: strptr("vpc-ipv4"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without subnet -> should be ignored + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:1::1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + }, + // interface with subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + // interface with another subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-1"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv4", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("5.6.7.8:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv4"), + "__meta_ec2_availability_zone": model.LabelValue("azname-c"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"), + "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"), + "__meta_ec2_region": model.LabelValue("region-ipv4"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"), + }, + }, + }, + }, + }, + { + name: "Ipv6", + ec2Data: &ec2DataStore{ + region: "region-ipv6", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv6"), + InstanceId: strptr("instance-id-ipv6"), + InstanceType: strptr("instance-type-ipv6"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + PrivateIpAddress: strptr("9.10.11.12"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-2"), + VpcId: strptr("vpc-ipv6"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without primary IPv6, index 2 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::1:1"), + IsPrimaryIpv6: boolptr(false), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 1 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(1), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::2:1"), + IsPrimaryIpv6: boolptr(false), + }, + { + Ipv6Address: strptr("2001:db8:2::2:2"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 3 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::3:1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-1"), + }, + // interface without primary IPv6, index 0 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(0), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv6", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("9.10.11.12:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv6"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"), + "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"), + "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"), + "__meta_ec2_region": model.LabelValue("region-ipv6"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"), + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + cfg: &EC2SDConfig{ + Port: 4242, + Region: client.ec2Data.region, + }, + } + + g, err := d.refresh(ctx) + require.NoError(t, err) + require.Equal(t, tt.expected, g) + }) + } +} + +// EC2 client mock. +type mockEC2Client struct { + ec2iface.EC2API + ec2Data ec2DataStore +} + +func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client { + client := mockEC2Client{ + ec2Data: *ec2Data, + } + return &client +} + +func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) { + if len(m.ec2Data.azToAZID) == 0 { + return nil, errors.New("No AZs found") + } + + azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID)) + + i := 0 + for k, v := range m.ec2Data.azToAZID { + azs[i] = &ec2.AvailabilityZone{ + ZoneName: strptr(k), + ZoneId: strptr(v), + } + i++ + } + + return &ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: azs, + }, nil +} + +func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error { + r := ec2.Reservation{} + r.SetInstances(m.ec2Data.instances) + r.SetOwnerId(m.ec2Data.ownerID) + + o := ec2.DescribeInstancesOutput{} + o.SetReservations([]*ec2.Reservation{&r}) + + _ = fn(&o, true) + + return nil +} diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 0b046be6d..b892867f1 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -134,7 +134,7 @@ type LightsailDiscovery struct { func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { m, ok := metrics.(*lightsailMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 35bbc3847..ec1c51ace 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -186,7 +186,7 @@ type Discovery struct { func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*azureMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index fcae7b186..cb3dfe137 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -189,7 +189,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*consulMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index 52f3a9c57..fce8d1a35 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -15,6 +15,7 @@ package digitalocean import ( "context" + "errors" "fmt" "log/slog" "net" @@ -114,7 +115,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*digitaloceanMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 5de7f6488..405dba44f 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -121,7 +121,7 @@ type Discovery struct { func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dnsMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go index 96bb32491..f01a075c4 100644 --- a/discovery/dns/dns_test.go +++ b/discovery/dns/dns_test.go @@ -15,7 +15,7 @@ package dns import ( "context" - "fmt" + "errors" "log/slog" "net" "testing" @@ -53,7 +53,7 @@ func TestDNS(t *testing.T) { Type: "A", }, lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { - return nil, fmt.Errorf("some error") + return nil, errors.New("some error") }, expected: []*targetgroup.Group{}, }, diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go index 508734648..3cac667f8 100644 --- a/discovery/eureka/eureka.go +++ b/discovery/eureka/eureka.go @@ -16,7 +16,6 @@ package eureka import ( "context" "errors" - "fmt" "log/slog" "net" "net/http" @@ -129,7 +128,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*eurekaMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd") diff --git a/discovery/file/file.go b/discovery/file/file.go index 1c36b254c..beea03222 100644 --- a/discovery/file/file.go +++ b/discovery/file/file.go @@ -184,7 +184,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { fm, ok := metrics.(*fileMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go index a509a144e..9a5b0e856 100644 --- a/discovery/gce/gce.go +++ b/discovery/gce/gce.go @@ -132,7 +132,7 @@ type Discovery struct { func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*gceMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go index 980c197d7..02e227299 100644 --- a/discovery/hetzner/hetzner.go +++ b/discovery/hetzner/hetzner.go @@ -138,7 +138,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*hetznerMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, logger) diff --git a/discovery/http/http.go b/discovery/http/http.go index 004a5b4ae..667fd36f6 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -41,8 +41,8 @@ import ( var ( // DefaultSDConfig is the default HTTP SD configuration. DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(60 * time.Second), HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(60 * time.Second), } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) @@ -86,17 +86,17 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.URL == "" { - return fmt.Errorf("URL is missing") + return errors.New("URL is missing") } parsedURL, err := url.Parse(c.URL) if err != nil { return err } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { - return fmt.Errorf("URL scheme must be 'http' or 'https'") + return errors.New("URL scheme must be 'http' or 'https'") } if parsedURL.Host == "" { - return fmt.Errorf("host is missing in URL") + return errors.New("host is missing in URL") } return c.HTTPClientConfig.Validate() } @@ -118,7 +118,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*httpMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go index 1aa21667e..1badda48c 100644 --- a/discovery/ionos/ionos.go +++ b/discovery/ionos/ionos.go @@ -15,7 +15,6 @@ package ionos import ( "errors" - "fmt" "log/slog" "time" @@ -46,7 +45,7 @@ type Discovery struct{} func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ionosMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if conf.ionosEndpoint == "" { diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 75da67f1c..14d3bc7a9 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -102,10 +102,7 @@ func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node return } - ep := &apiv1.Endpoints{} - ep.Namespace = svc.Namespace - ep.Name = svc.Name - obj, exists, err := e.endpointsStore.Get(ep) + obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name)) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } @@ -167,8 +164,11 @@ func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { @@ -454,11 +454,8 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { e.logger.Error("resolving pod ref failed", "err", err) return nil @@ -470,11 +467,7 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { - svc := &apiv1.Service{} - svc.Namespace = ns - svc.Name = name - - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { e.logger.Error("retrieving service failed", "err", err) return @@ -482,7 +475,7 @@ func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 4af688960..a1ac6e5d4 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -18,10 +18,12 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -1257,3 +1259,22 @@ func TestEndpointsDiscoverySidecarContainer(t *testing.T) { }, }.Run(t) } + +func BenchmarkResolvePodRef(b *testing.B) { + indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil) + e := &Endpoints{ + podStore: indexer, + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + p := e.resolvePodRef(&v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "foo", + }) + require.Nil(b, p) + } +} diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index efd1c7216..45bc43eff 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -145,8 +145,11 @@ func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, n e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { @@ -464,11 +467,8 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { e.logger.Error("resolving pod ref failed", "err", err) return nil @@ -481,19 +481,19 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) { var ( - svc = &apiv1.Service{} found bool + name string ) - svc.Namespace = esa.namespace() + ns := esa.namespace() // Every EndpointSlice object has the Service they belong to in the // kubernetes.io/service-name label. - svc.Name, found = esa.labels()[esa.labelServiceName()] + name, found = esa.labels()[esa.labelServiceName()] if !found { return } - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { e.logger.Error("retrieving service failed", "err", err) return @@ -501,7 +501,7 @@ func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgro if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index be1c77c20..9aff89e4f 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -173,7 +173,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.Role == "" { - return fmt.Errorf("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)") + return errors.New("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)") } err = c.HTTPClientConfig.Validate() if err != nil { @@ -181,20 +181,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { } if c.APIServer.URL != nil && c.KubeConfig != "" { // Api-server and kubeconfig_file are mutually exclusive - return fmt.Errorf("cannot use 'kubeconfig_file' and 'api_server' simultaneously") + return errors.New("cannot use 'kubeconfig_file' and 'api_server' simultaneously") } if c.KubeConfig != "" && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) { // Kubeconfig_file and custom http config are mutually exclusive - return fmt.Errorf("cannot use a custom HTTP client configuration together with 'kubeconfig_file'") + return errors.New("cannot use a custom HTTP client configuration together with 'kubeconfig_file'") } if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) { - return fmt.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly") + return errors.New("to use custom HTTP client configuration please provide the 'api_server' URL explicitly") } if c.APIServer.URL != nil && c.NamespaceDiscovery.IncludeOwnNamespace { - return fmt.Errorf("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously") + return errors.New("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously") } if c.KubeConfig != "" && c.NamespaceDiscovery.IncludeOwnNamespace { - return fmt.Errorf("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously") + return errors.New("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously") } foundSelectorRoles := make(map[Role]struct{}) @@ -288,7 +288,7 @@ func (d *Discovery) getNamespaces() []string { func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { m, ok := metrics.(*kubernetesMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if l == nil { @@ -672,7 +672,7 @@ func (d *Discovery) newPodsByNodeInformer(plw *cache.ListWatch) cache.SharedInde indexers[nodeIndex] = func(obj interface{}) ([]string, error) { pod, ok := obj.(*apiv1.Pod) if !ok { - return nil, fmt.Errorf("object is not a pod") + return nil, errors.New("object is not a pod") } return []string{pod.Spec.NodeName}, nil } @@ -686,7 +686,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share indexers[podIndex] = func(obj interface{}) ([]string, error) { e, ok := obj.(*apiv1.Endpoints) if !ok { - return nil, fmt.Errorf("object is not endpoints") + return nil, errors.New("object is not endpoints") } var pods []string for _, target := range e.Subsets { @@ -705,7 +705,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share indexers[nodeIndex] = func(obj interface{}) ([]string, error) { e, ok := obj.(*apiv1.Endpoints) if !ok { - return nil, fmt.Errorf("object is not endpoints") + return nil, errors.New("object is not endpoints") } var nodes []string for _, target := range e.Subsets { @@ -751,7 +751,7 @@ func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object } } default: - return nil, fmt.Errorf("object is not an endpointslice") + return nil, errors.New("object is not an endpointslice") } return nodes, nil @@ -804,3 +804,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, func namespacedName(namespace, name string) string { return namespace + "/" + name } + +// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone. +// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key. +func nodeName(o interface{}) (string, error) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o) + if err != nil { + return "", err + } + return key, nil +} diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go index fbbd77c3c..a14f2b3d1 100644 --- a/discovery/kubernetes/kubernetes_test.go +++ b/discovery/kubernetes/kubernetes_test.go @@ -23,7 +23,9 @@ import ( prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/version" "k8s.io/apimachinery/pkg/watch" @@ -320,3 +322,18 @@ func TestFailuresCountMetric(t *testing.T) { }) } } + +func TestNodeName(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + name, err := nodeName(node) + require.NoError(t, err) + require.Equal(t, "foo", name) + + name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"}) + require.NoError(t, err) + require.Equal(t, "bar", name) +} diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index eecb52ab5..0e0c5745f 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -82,7 +82,7 @@ func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.Co } func (n *Node) enqueue(obj interface{}) { - key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + key, err := nodeName(obj) if err != nil { return } diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 73568e51c..8704a6623 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -95,8 +95,11 @@ func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedIn p.enqueuePodsForNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - p.enqueuePodsForNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + p.enqueuePodsForNode(nodeName) }, }) if err != nil { diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index dfc12417c..90a91fc92 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -141,7 +141,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*linodeMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/manager_test.go b/discovery/manager_test.go index b882c0b02..0ff82d541 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -15,6 +15,7 @@ package discovery import ( "context" + "errors" "fmt" "sort" "strconv" @@ -1209,9 +1210,9 @@ func TestGaugeFailedConfigs(t *testing.T) { c := map[string]Configs{ "prometheus": { - errorConfig{fmt.Errorf("tests error 0")}, - errorConfig{fmt.Errorf("tests error 1")}, - errorConfig{fmt.Errorf("tests error 2")}, + errorConfig{errors.New("tests error 0")}, + errorConfig{errors.New("tests error 1")}, + errorConfig{errors.New("tests error 2")}, }, } discoveryManager.ApplyConfig(c) diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index f81a4410e..9c93e43f5 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -143,7 +143,7 @@ type Discovery struct { func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*marathonMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd") diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 1a732c050..13cf20d6d 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -15,6 +15,7 @@ package moby import ( "context" + "errors" "fmt" "log/slog" "net" @@ -110,7 +111,7 @@ func (c *DockerSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error return err } if c.Host == "" { - return fmt.Errorf("host missing") + return errors.New("host missing") } if _, err = url.Parse(c.Host); err != nil { return err @@ -131,7 +132,7 @@ type DockerDiscovery struct { func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { m, ok := metrics.(*dockerMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &DockerDiscovery{ diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go index 9e93e581f..ba4225341 100644 --- a/discovery/moby/dockerswarm.go +++ b/discovery/moby/dockerswarm.go @@ -15,6 +15,7 @@ package moby import ( "context" + "errors" "fmt" "log/slog" "net/http" @@ -99,7 +100,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e return err } if c.Host == "" { - return fmt.Errorf("host missing") + return errors.New("host missing") } if _, err = url.Parse(c.Host); err != nil { return err @@ -107,7 +108,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e switch c.Role { case "services", "nodes", "tasks": case "": - return fmt.Errorf("role missing (one of: tasks, services, nodes)") + return errors.New("role missing (one of: tasks, services, nodes)") default: return fmt.Errorf("invalid role %s, expected tasks, services, or nodes", c.Role) } @@ -128,7 +129,7 @@ type Discovery struct { func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dockerswarmMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go index 1dbd8f160..751630802 100644 --- a/discovery/nomad/nomad.go +++ b/discovery/nomad/nomad.go @@ -124,7 +124,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*nomadMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go index fa7e0cce9..cd0bcc126 100644 --- a/discovery/openstack/openstack.go +++ b/discovery/openstack/openstack.go @@ -145,7 +145,7 @@ type refresher interface { func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*openstackMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, l) diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go index 08ed70296..a75e9694f 100644 --- a/discovery/ovhcloud/ovhcloud.go +++ b/discovery/ovhcloud/ovhcloud.go @@ -151,7 +151,7 @@ func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ovhcloudMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, logger) diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 6122a76da..561bf78ba 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -17,6 +17,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "log/slog" @@ -109,20 +110,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.URL == "" { - return fmt.Errorf("URL is missing") + return errors.New("URL is missing") } parsedURL, err := url.Parse(c.URL) if err != nil { return err } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { - return fmt.Errorf("URL scheme must be 'http' or 'https'") + return errors.New("URL scheme must be 'http' or 'https'") } if parsedURL.Host == "" { - return fmt.Errorf("host is missing in URL") + return errors.New("host is missing in URL") } if c.Query == "" { - return fmt.Errorf("query missing") + return errors.New("query missing") } return c.HTTPClientConfig.Validate() } @@ -142,7 +143,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*puppetdbMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { diff --git a/discovery/refresh/refresh_test.go b/discovery/refresh/refresh_test.go index b70a32635..0d4460ffa 100644 --- a/discovery/refresh/refresh_test.go +++ b/discovery/refresh/refresh_test.go @@ -15,7 +15,7 @@ package refresh import ( "context" - "fmt" + "errors" "testing" "time" @@ -64,7 +64,7 @@ func TestRefresh(t *testing.T) { case 2: return tg2, nil } - return nil, fmt.Errorf("some error") + return nil, errors.New("some error") } interval := time.Millisecond diff --git a/discovery/registry.go b/discovery/registry.go index 1f491d4ca..2401d78fb 100644 --- a/discovery/registry.go +++ b/discovery/registry.go @@ -267,7 +267,7 @@ func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error { func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManager) (map[string]DiscovererMetrics, error) { err := rmm.Register() if err != nil { - return nil, fmt.Errorf("failed to create service discovery refresh metrics") + return nil, errors.New("failed to create service discovery refresh metrics") } metrics := make(map[string]DiscovererMetrics) @@ -275,7 +275,7 @@ func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManag currentSdMetrics := conf.NewDiscovererMetrics(registerer, rmm) err = currentSdMetrics.Register() if err != nil { - return nil, fmt.Errorf("failed to create service discovery metrics") + return nil, errors.New("failed to create service discovery metrics") } metrics[conf.Name()] = currentSdMetrics } diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go index 670e439c4..bc9282fea 100644 --- a/discovery/scaleway/scaleway.go +++ b/discovery/scaleway/scaleway.go @@ -188,7 +188,7 @@ type Discovery struct{} func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*scalewayMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf) diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go index 7b3b18f47..5ec7b6521 100644 --- a/discovery/triton/triton.go +++ b/discovery/triton/triton.go @@ -149,7 +149,7 @@ type Discovery struct { func New(logger *slog.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*tritonMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } tls, err := config.NewTLSConfig(&conf.TLSConfig) diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index de806895d..1bd0cd2d4 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -215,7 +215,7 @@ func getEndpointInfoForSystems( func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*uyuniMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } apiURL, err := url.Parse(conf.Server) diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go index f82b22168..ee92f0169 100644 --- a/discovery/vultr/vultr.go +++ b/discovery/vultr/vultr.go @@ -15,6 +15,7 @@ package vultr import ( "context" + "errors" "fmt" "log/slog" "net" @@ -117,7 +118,7 @@ type Discovery struct { func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*vultrMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index 55b3d628e..6208e6182 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -14,6 +14,7 @@ package xds import ( + "errors" "fmt" "log/slog" "net/url" @@ -161,7 +162,7 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { m, ok := metrics.(*xdsMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } // Default to "prometheus" if hostname is unavailable. diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index a179a2f9f..9f5400c41 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -17,6 +17,7 @@ The Prometheus monitoring server | --config.file | Prometheus configuration file path. | `prometheus.yml` | | --config.auto-reload-interval | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` | | --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` | +| --auto-gomemlimit | Automatically set GOMEMLIMIT to match Linux container or system memory limit | `true` | | --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` | | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | | --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | @@ -58,7 +59,7 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | | --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 996a99655..5e2a8f6bb 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -462,6 +462,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 3c1ec84f0..cd33dba8e 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -21,10 +21,13 @@ An example rules file with an alert would be: ```yaml groups: - name: example + labels: + team: myteam rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m + keep_firing_for: 5m labels: severity: page annotations: @@ -38,6 +41,13 @@ the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. Alerting rules without the `for` clause will become active on the first evaluation. +There is also an optional `keep_firing_for` clause that tells Prometheus to keep +this alert firing for the specified duration after the firing condition was last met. +This can be used to prevent situations such as flapping alerts, false resolutions +due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause +will deactivate on the first evaluation where the condition is not met (assuming +any optional `for` duration desribed above has been satisfied). + The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 46d325141..2093ed883 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -79,7 +79,11 @@ global: [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). + # external systems (federation, remote storage, Alertmanager). + # Environment variable references `${var}` or `$var` are replaced according + # to the values of the current environment variables. + # References to undefined variables are replaced by the empty string. + # The `$` character can be escaped by using `$$`. external_labels: [ : ... ] @@ -208,12 +212,18 @@ job_name: # The protocols to negotiate during a scrape with the client. # Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4. +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. [ scrape_protocols: [, ...] | default = ] -# Whether to scrape a classic histogram that is also exposed as a native +# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise +# invalid Content-Type. +# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. +[ fallback_scrape_protocol: ] + +# Whether to scrape a classic histogram, even if it is also exposed as a native # histogram (has no effect without --enable-feature=native-histograms). -[ scrape_classic_histograms: | default = false ] +[ always_scrape_classic_histograms: | default = false ] # The HTTP resource path on which to fetch metrics from targets. [ metrics_path: | default = /metrics ] @@ -2879,6 +2889,7 @@ metadata_config: # HTTP client settings, including authentication methods (such as basic auth and # authorization), proxy configurations, TLS options, custom HTTP headers, etc. +# enable_http2 defaults to false for remote-write. [ ] ``` @@ -2930,8 +2941,6 @@ with this feature. `tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. -NOTE: Out-of-order ingestion is an experimental feature, but you do not need any additional flag to enable it. Setting `out_of_order_time_window` to a positive duration enables it. - ```yaml # Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. # An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 9aa226bbc..9a8e7a70c 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -89,6 +89,11 @@ name: # Offset the rule evaluation timestamp of this particular group by the specified duration into the past. [ query_offset: | default = global.rule_query_offset ] +# Labels to add or overwrite before storing the result for its rules. +# Labels defined in will override the key if it has a collision. +labels: + [ : ] + rules: [ - ... ] ``` diff --git a/docs/feature_flags.md b/docs/feature_flags.md index a3e2c0b9e..0541961f2 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -11,15 +11,6 @@ Their behaviour can change in future releases which will be communicated via the You can enable them using the `--enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. -## Expand environment variables in external labels - -`--enable-feature=expand-external-labels` - -Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file) -values according to the values of the current environment variables. References -to undefined variables are replaced by the empty string. -The `$` character can be escaped by using `$$`. - ## Exemplars storage `--enable-feature=exemplar-storage` @@ -32,9 +23,8 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem `--enable-feature=memory-snapshot-on-shutdown` -This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores -it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped -chunks without the need of WAL replay. +This takes a snapshot of the chunks that are in memory along with the series information when shutting down and stores it on disk. This will reduce the startup time since the memory state can now be restored with this snapshot +and m-mapped chunks, while a WAL replay from disk is only needed for the parts of the WAL that are not part of the snapshot. ## Extra scrape metrics @@ -63,14 +53,6 @@ computed at all. When enabled, GOMAXPROCS variable is automatically set to match Linux container CPU quota. -## Auto GOMEMLIMIT - -`--enable-feature=auto-gomemlimit` - -When enabled, the GOMEMLIMIT variable is automatically set to match the Linux container memory limit. If there is no container limit, or the process is running outside of containers, the system memory total is used. - -There is also an additional tuning flag, `--auto-gomemlimit.ratio`, which allows controlling how much of the memory is used for Prometheus. The remainder is reserved for memory outside the process. For example, kernel page cache. Page cache is important for Prometheus TSDB query performance. The default is `0.9`, which means 90% of the memory limit will be used for Prometheus. - ## Native Histograms `--enable-feature=native-histograms` @@ -93,59 +75,7 @@ those classic histograms that do not come with a corresponding native histogram. However, if a native histogram is present, Prometheus will ignore the corresponding classic histogram, with the notable exception of exemplars, which are always ingested. To keep the classic histograms as well, enable -`scrape_classic_histograms` in the scrape job. - -_Note about the format of `le` and `quantile` label values:_ - -In certain situations, the protobuf parsing changes the number formatting of -the `le` labels of classic histograms and the `quantile` labels of -summaries. Typically, this happens if the scraped target is instrumented with -[client_golang](https://github.com/prometheus/client_golang) provided that -[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts) -is set to `false`. In such a case, integer label values are represented in the -text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing -changes the representation to float-like (following the OpenMetrics -specification), so the examples above become `quantile="1.0"` and `le="2.0"` after -ingestion into Prometheus, which changes the identity of the metric compared to -what was ingested before via the text format. - -The effect of this change is that alerts, recording rules and dashboards that -directly reference label values as whole numbers such as `le="1"` will stop -working. - -Aggregation by the `le` and `quantile` labels for vectors that contain the old and -new formatting will lead to unexpected results, and range vectors that span the -transition between the different formatting will contain additional series. -The most common use case for both is the quantile calculation via -`histogram_quantile`, e.g. -`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`. -The `histogram_quantile` function already tries to mitigate the effects to some -extent, but there will be inaccuracies, in particular for shorter ranges that -cover only a few samples. - -Ways to deal with this change either globally or on a per metric basis: - -- Fix references to integer `le`, `quantile` label values, but otherwise do -nothing and accept that some queries that span the transition time will produce -inaccurate or unexpected results. -_This is the recommended solution, to get consistently normalized label values._ -Also Prometheus 3.0 is expected to enforce normalization of these label values. -- Use `metric_relabel_config` to retain the old labels when scraping targets. -This should **only** be applied to metrics that currently produce such labels. - - -```yaml - metric_relabel_configs: - - source_labels: - - quantile - target_label: quantile - regex: (\d+)\.0+ - - source_labels: - - le - - __name__ - target_label: le - regex: (\d+)\.0+;.*_bucket -``` +`always_scrape_classic_histograms` in the scrape job. ## Experimental PromQL functions diff --git a/docs/migration.md b/docs/migration.md index cb88bbfd6..43fc43df2 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -3,198 +3,198 @@ title: Migration sort_rank: 10 --- -# Prometheus 2.0 migration guide +# Prometheus 3.0 migration guide -In line with our [stability promise](https://prometheus.io/blog/2016/07/18/prometheus-1-0-released/#fine-print), -the Prometheus 2.0 release contains a number of backwards incompatible changes. -This document offers guidance on migrating from Prometheus 1.8 to Prometheus 2.0 and newer versions. +In line with our [stability promise](https://prometheus.io/docs/prometheus/latest/stability/), +the Prometheus 3.0 release contains a number of backwards incompatible changes. +This document offers guidance on migrating from Prometheus 2.x to Prometheus 3.0 and newer versions. ## Flags -The format of Prometheus command line flags has changed. Instead of a -single dash, all flags now use a double dash. Common flags (`--config.file`, -`--web.listen-address` and `--web.external-url`) remain but -almost all storage-related flags have been removed. +- The following feature flags have been removed and they have been added to the + default behavior of Prometheus v3: + - `promql-at-modifier` + - `promql-negative-offset` + - `remote-write-receiver` + - `new-service-discovery-manager` + - `expand-external-labels` + Environment variable references `${var}` or `$var` in external label values + are replaced according to the values of the current environment variables. + References to undefined variables are replaced by the empty string. + The `$` character can be escaped by using `$$`. + - `no-default-scrape-port` + Prometheus v3 will no longer add ports to scrape targets according to the + specified scheme. Target will now appear in labels as configured. + If you rely on scrape targets like + `https://example.com/metrics` or `http://exmaple.com/metrics` to be + represented as `https://example.com/metrics:443` and + `http://example.com/metrics:80` respectively, add them to your target URLs + - `agent` + Instead use the dedicated `--agent` cli flag. -Some notable flags which have been removed: + Prometheus v3 will log a warning if you continue to pass these to + `--enable-feature`. -- `-alertmanager.url` In Prometheus 2.0, the command line flags for configuring - a static Alertmanager URL have been removed. Alertmanager must now be - discovered via service discovery, see [Alertmanager service discovery](#alertmanager-service-discovery). +## Configuration -- `-log.format` In Prometheus 2.0 logs can only be streamed to standard error. - -- `-query.staleness-delta` has been renamed to `--query.lookback-delta`; Prometheus - 2.0 introduces a new mechanism for handling staleness, see [staleness](querying/basics.md#staleness). - -- `-storage.local.*` Prometheus 2.0 introduces a new storage engine; as such all - flags relating to the old engine have been removed. For information on the - new engine, see [Storage](#storage). - -- `-storage.remote.*` Prometheus 2.0 has removed the deprecated remote - storage flags, and will fail to start if they are supplied. To write to - InfluxDB, Graphite, or OpenTSDB use the relevant storage adapter. - -## Alertmanager service discovery - -Alertmanager service discovery was introduced in Prometheus 1.4, allowing Prometheus -to dynamically discover Alertmanager replicas using the same mechanism as scrape -targets. In Prometheus 2.0, the command line flags for static Alertmanager config -have been removed, so the following command line flag: - -``` -./prometheus -alertmanager.url=http://alertmanager:9093/ -``` - -Would be replaced with the following in the `prometheus.yml` config file: - -```yaml -alerting: - alertmanagers: - - static_configs: - - targets: - - alertmanager:9093 -``` - -You can also use all the usual Prometheus service discovery integrations and -relabeling in your Alertmanager configuration. This snippet instructs -Prometheus to search for Kubernetes pods, in the `default` namespace, with the -label `name: alertmanager` and with a non-empty port. - -```yaml -alerting: - alertmanagers: - - kubernetes_sd_configs: - - role: pod - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_name] - regex: alertmanager - action: keep - - source_labels: [__meta_kubernetes_namespace] - regex: default - action: keep - - source_labels: [__meta_kubernetes_pod_container_port_number] - regex: - action: drop -``` - -## Recording rules and alerts - -The format for configuring alerting and recording rules has been changed to YAML. -An example of a recording rule and alert in the old format: - -``` -job:request_duration_seconds:histogram_quantile99 = - histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - -ALERT FrontendRequestLatency - IF job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - FOR 5m - ANNOTATIONS { - summary = "High frontend request latency", - } -``` - -Would look like this: - -```yaml -groups: -- name: example.rules - rules: - - record: job:request_duration_seconds:histogram_quantile99 - expr: histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - - alert: FrontendRequestLatency - expr: job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - for: 5m - annotations: - summary: High frontend request latency -``` - -To help with the change, the `promtool` tool has a mode to automate the rules conversion. Given a `.rules` file, it will output a `.rules.yml` file in the -new format. For example: - -``` -$ promtool update rules example.rules -``` - -You will need to use `promtool` from [Prometheus 2.5](https://github.com/prometheus/prometheus/releases/tag/v2.5.0) as later versions no longer contain the above subcommand. - -## Storage - -The data format in Prometheus 2.0 has completely changed and is not backwards -compatible with 1.8 and older versions. To retain access to your historic monitoring data we -recommend you run a non-scraping Prometheus instance running at least version -1.8.1 in parallel with your Prometheus 2.0 instance, and have the new server -read existing data from the old one via the remote read protocol. - -Your Prometheus 1.8 instance should be started with the following flags and an -config file containing only the `external_labels` setting (if any): - -``` -$ ./prometheus-1.8.1.linux-amd64/prometheus -web.listen-address ":9094" -config.file old.yml -``` - -Prometheus 2.0 can then be started (on the same machine) with the following flags: - -``` -$ ./prometheus-2.0.0.linux-amd64/prometheus --config.file prometheus.yml -``` - -Where `prometheus.yml` contains in addition to your full existing configuration, the stanza: - -```yaml -remote_read: - - url: "http://localhost:9094/api/v1/read" -``` +- The scrape job level configuration option `scrape_classic_histograms` has been + renamed to `always_scrape_classic_histograms`. If you use the + `--enable-feature=native-histograms` feature flag to ingest native histograms + and you also want to ingest classic histograms that an endpoint might expose + along with native histograms, be sure to add this configuration or change your + configuration from the old name. +- The `http_config.enable_http2` in `remote_write` items default has been + changed to `false`. In Prometheus v2 the remote write http client would + default to use http2. In order to parallelize multiple remote write queues + across multiple sockets its preferable to not default to http2. + If you prefer to use http2 for remote write you must now set + `http_config.enable_http2: true` in your `remote_write` configuration section. ## PromQL -The following features have been removed from PromQL: +- The `.` pattern in regular expressions in PromQL matches newline characters. + With this change a regular expressions like `.*` matches strings that include + `\n`. This applies to matchers in queries and relabel configs. For example the + following regular expressions now match the accompanying strings, wheras in + Prometheus v2 these combinations didn't match. -- `drop_common_labels` function - the `without` aggregation modifier should be used - instead. -- `keep_common` aggregation modifier - the `by` modifier should be used instead. -- `count_scalar` function - use cases are better handled by `absent()` or correct - propagation of labels in operations. +| Regex | Additional matches | +| ----- | ------ | +| ".*" | "foo\n", "Foo\nBar" | +| "foo.?bar" | "foo\nbar" | +| "foo.+bar" | "foo\nbar" | -See [issue #3060](https://github.com/prometheus/prometheus/issues/3060) for more -details. + If you want Prometheus v3 to behave like v2 did, you will have to change your + regular expressions by replacing all `.` patterns with `[^\n]`, e.g. + `foo[^\n]*`. +- Lookback and range selectors are left open and right closed (previously left + closed and right closed). This change affects queries when the evaluation time + perfectly aligns with the sample timestamps. For example assume querying a + timeseries with even spaced samples exactly 1 minute apart. Before Prometheus + 3.x, range query with `5m` will mostly return 5 samples. But if the query + evaluation aligns perfectly with a scrape, it would return 6 samples. In + Prometheus 3.x queries like this will always return 5 samples. + This change has likely few effects for everyday use, except for some sub query + use cases. + Query front-ends that align queries usually align sub-queries to multiples of + the step size. These sub queries will likely be affected. + Tests are more likely to affected. To fix those either adjust the expected + number of samples or extend to range by less then one sample interval. +- The `holt_winters` function has been renamed to `double_exponential_smoothing` + and is now guarded by the `promql-experimental-functions` feature flag. + If you want to keep using holt_winters, you have to do both of these things: + - Rename holt_winters to double_exponential_smoothing in your queries. + - Pass `--enable-feature=promql-experimental-functions` in your Prometheus + cli invocation.. + +## Scrape protocols +Prometheus v3 is more strict concerning the Content-Type header received when +scraping. Prometheus v2 would default to the standard Prometheus text protocol +if the target being scraped did not specify a Content-Type header or if the +header was unparsable or unrecognised. This could lead to incorrect data being +parsed in the scrape. Prometheus v3 will now fail the scrape in such cases. + +If a scrape target is not providing the correct Content-Type header the +fallback protocol can be specified using the fallback_scrape_protocol +parameter. See [Prometheus scrape_config documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) + +This is a breaking change as scrapes that may have succeeded with Prometheus v2 +may now fail if this fallback protocol is not specified. ## Miscellaneous -### Prometheus non-root user +### TSDB format and downgrade +The TSDB format has been changed in Prometheus v2.55 in preparation for changes +to the index format. Consequently a Prometheus v3 tsdb can only be read by a +Prometheus v2.55 or newer. +Before upgrading to Prometheus v3 please upgrade to v2.55 first and confirm +Prometheus works as expected. Only then continue with the upgrade to v3. -The Prometheus Docker image is now built to [run Prometheus -as a non-root user](https://github.com/prometheus/prometheus/pull/2859). If you -want the Prometheus UI/API to listen on a low port number (say, port 80), you'll -need to override it. For Kubernetes, you would use the following YAML: +### TSDB Storage contract +TSDB compatible storage is now expected to return results matching the specified +selectors. This might impact some third party implementations, most likely +implementing `remote_read`. +This contract is not explicitly enforced, but can cause undefined behavior. + +### UTF-8 names +Prometheus v3 supports UTF-8 in metric and label names. This means metric and +label names can change after upgrading according to what is exposed by +endpoints. Furthermore, metric and label names that would have previously been +flagged as invalid no longer will be. + +Users wishing to preserve the original validation behavior can update their +prometheus yaml configuration to specify the legacy validation scheme: + +``` +global: + metric_name_validation_scheme: legacy +``` + +Or on a per-scrape basis: + +``` +scrape_configs: + - job_name: job1 + metric_name_validation_scheme: utf8 + - job_name: job2 + metric_name_validation_scheme: legacy +``` + +### Log message format +Prometheus v3 has adopted `log/slog` over the previous `go-kit/log`. This +results in a change of log message format. An example of the old log format is: +``` +ts=2024-10-23T22:01:06.074Z caller=main.go:627 level=info msg="No time or size retention was set so using the default time retention" duration=15d +ts=2024-10-23T22:01:06.074Z caller=main.go:671 level=info msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=91d80252c3e528728b0f88d254dd720f6be07cb8-modified)" +ts=2024-10-23T22:01:06.074Z caller=main.go:676 level=info build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" +ts=2024-10-23T22:01:06.074Z caller=main.go:677 level=info host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" +``` + +a similar sequence in the new log format looks like this: +``` +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:640 msg="No time or size retention was set so using the default time retention" duration=15d +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:681 msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=7c7116fea8343795cae6da42960cacd0207a2af8)" +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:686 msg="operational information" build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" fd_limits="(soft=1048576, hard=1048576)" vm_limits="(soft=unlimited, hard=unlimited)" +``` + +### `le` and `quantile` label values +In Prometheus v3, the values of the `le` label of classic histograms and the +`quantile` label of summaries are normalized upon ingestions. In Prometheus v2 +the value of these labels depended on the scrape protocol (protobuf vs text +format) in some situations. This led to label values changing based on the +scrape protocol. E.g. a metric exposed as `my_classic_hist{le="1"}` would be +ingested as `my_classic_hist{le="1"}` via the text format, but as +`my_classic_hist{le="1.0"}` via protobuf. This changed the identity of the +metric and caused problems when querying the metric. +In Prometheus v3 these label values will always be normalized to a float like +representation. I.e. the above example will always result in +`my_classic_hist{le="1.0"}` being ingested into prometheus, no matter via which +protocol. The effect of this change is that alerts, recording rules and +dashboards that directly reference label values as whole numbers such as +`le="1"` will stop working. + +Ways to deal with this change either globally or on a per metric basis: + +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution._ +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: security-context-demo-2 -spec: - securityContext: - runAsUser: 0 -... + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket ``` -See [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) -for more details. +# Prometheus 2.0 migration guide -If you're using Docker, then the following snippet would be used: - -``` -docker run -p 9090:9090 prom/prometheus:latest -``` - -### Prometheus lifecycle - -If you use the Prometheus `/-/reload` HTTP endpoint to [automatically reload your -Prometheus config when it changes](configuration/configuration.md), -these endpoints are disabled by default for security reasons in Prometheus 2.0. -To enable them, set the `--web.enable-lifecycle` flag. +For the Prometheus 1.8 to 2.0 please refer to the [Prometheus v2.55 documentation](https://prometheus.io/docs/prometheus/2.55/migration/). diff --git a/docs/querying/api.md b/docs/querying/api.md index 1095171b2..0352496f1 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -568,7 +568,7 @@ Instant vectors are returned as result type `vector`. The corresponding Each series could have the `"value"` key, or the `"histogram"` key, but not both. Series are not guaranteed to be returned in any particular order unless a function -such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label)` +such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label) is used. ### Scalars @@ -764,6 +764,8 @@ URL query parameters: - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. - `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules @@ -903,7 +905,7 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ ``` The following example returns metadata for all metrics for all targets with -label `instance="127.0.0.1:9090`. +label `instance="127.0.0.1:9090"`. ```json curl -G http://localhost:9091/api/v1/targets/metadata \ @@ -1188,9 +1190,11 @@ The following endpoint returns various cardinality statistics about the Promethe GET /api/v1/status/tsdb ``` URL query parameters: + - `limit=`: Limit the number of returned items to a given number for each set of statistics. By default, 10 items are returned. -The `data` section of the query result consists of +The `data` section of the query result consists of: + - **headStats**: This provides the following data about the head block of the TSDB: - **numSeries**: The number of series. - **chunkCount**: The number of chunks. @@ -1266,13 +1270,13 @@ The following endpoint returns information about the WAL replay: GET /api/v1/status/walreplay ``` -**read**: The number of segments replayed so far. -**total**: The total number segments needed to be replayed. -**progress**: The progress of the replay (0 - 100%). -**state**: The state of the replay. Possible states: -- **waiting**: Waiting for the replay to start. -- **in progress**: The replay is in progress. -- **done**: The replay has finished. +- **read**: The number of segments replayed so far. +- **total**: The total number segments needed to be replayed. +- **progress**: The progress of the replay (0 - 100%). +- **state**: The state of the replay. Possible states: + - **waiting**: Waiting for the replay to start. + - **in progress**: The replay is in progress. + - **done**: The replay has finished. ```json $ curl http://localhost:9090/api/v1/status/walreplay diff --git a/docs/querying/functions.md b/docs/querying/functions.md index b6f218180..310b7b933 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -464,6 +464,97 @@ by the number of seconds under the specified time range window, and should be used primarily for human readability. Use `rate` in recording rules so that increases are tracked consistently on a per-second basis. +## `info()` (experimental) + +_The `info` function is an experiment to improve UX +around including labels from [info metrics](https://grafana.com/blog/2021/08/04/how-to-use-promql-joins-for-more-effective-queries-of-prometheus-metrics-at-scale/#info-metrics). +The behavior of this function may change in future versions of Prometheus, +including its removal from PromQL. `info` has to be enabled via the +[feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`._ + +`info(v instant-vector, [data-label-selector instant-vector])` finds, for each time +series in `v`, all info series with matching _identifying_ labels (more on +this later), and adds the union of their _data_ (i.e., non-identifying) labels +to the time series. The second argument `data-label-selector` is optional. +It is not a real instant vector, but uses a subset of its syntax. +It must start and end with curly braces (`{ ... }`) and may only contain label matchers. +The label matchers are used to constrain which info series to consider +and which data labels to add to `v`. + +Identifying labels of an info series are the subset of labels that uniquely +identify the info series. The remaining labels are considered +_data labels_ (also called non-identifying). (Note that Prometheus's concept +of time series identity always includes _all_ the labels. For the sake of the `info` +function, we “logically” define info series identity in a different way than +in the conventional Prometheus view.) The identifying labels of an info series +are used to join it to regular (non-info) series, i.e. those series that have +the same labels as the identifying labels of the info series. The data labels, which are +the ones added to the regular series by the `info` function, effectively encode +metadata key value pairs. (This implies that a change in the data labels +in the conventional Prometheus view constitutes the end of one info series and +the beginning of a new info series, while the “logical” view of the `info` function is +that the same info series continues to exist, just with different “data”.) + +The conventional approach of adding data labels is sometimes called a “join query”, +as illustrated by the following example: + +``` + rate(http_server_request_duration_seconds_count[2m]) +* on (job, instance) group_left (k8s_cluster_name) + target_info +``` + +The core of the query is the expression `rate(http_server_request_duration_seconds_count[2m])`. +But to add data labels from an info metric, the user has to use elaborate +(and not very obvious) syntax to specify which info metric to use (`target_info`), what the +identifying labels are (`on (job, instance)`), and which data labels to add +(`group_left (k8s_cluster_name)`). + +This query is not only verbose and hard to write, it might also run into an “identity crisis”: +If any of the data labels of `target_info` changes, Prometheus sees that as a change of series +(as alluded to above, Prometheus just has no native concept of non-identifying labels). +If the old `target_info` series is not properly marked as stale (which can happen with certain ingestion paths), +the query above will fail for up to 5m (the lookback delta) because it will find a conflicting +match with both the old and the new version of `target_info`. + +The `info` function not only resolves this conflict in favor of the newer series, it also simplifies the syntax +because it knows about the available info series and what their identifying labels are. The example query +looks like this with the `info` function: + +``` +info( + rate(http_server_request_duration_seconds_count[2m]), + {k8s_cluster_name=~".+"} +) +``` + +The common case of adding _all_ data labels can be achieved by +omitting the 2nd argument of the `info` function entirely, simplifying +the example even more: + +``` +info(rate(http_server_request_duration_seconds_count[2m])) +``` + +While `info` normally automatically finds all matching info series, it's possible to +restrict them by providing a `__name__` label matcher, e.g. +`{__name__="target_info"}`. + +### Limitations + +In its current iteration, `info` defaults to considering only info series with +the name `target_info`. It also assumes that the identifying info series labels are +`instance` and `job`. `info` does support other info series names however, through +`__name__` label matchers. E.g., one can explicitly say to consider both +`target_info` and `build_info` as follows: +`{__name__=~"(target|build)_info"}`. However, the identifying labels always +have to be `instance` and `job`. + +These limitations are partially defeating the purpose of the `info` function. +At the current stage, this is an experiment to find out how useful the approach +turns out to be in practice. A final version of the `info` function will indeed +consider all matching info series and with their appropriate identifying labels. + ## `irate()` `irate(v range-vector)` calculates the per-second instant rate of increase of diff --git a/docs/stability.md b/docs/stability.md index 1fd2e51e0..cb30b8ad9 100644 --- a/docs/stability.md +++ b/docs/stability.md @@ -9,7 +9,7 @@ Prometheus promises API stability within a major version, and strives to avoid breaking changes for key features. Some features, which are cosmetic, still under development, or depend on 3rd party services, are not covered by this. -Things considered stable for 2.x: +Things considered stable for 3.x: * The query language and data model * Alerting and recording rules @@ -18,21 +18,25 @@ Things considered stable for 2.x: * Configuration file format (minus the service discovery remote read/write, see below) * Rule/alert file format * Console template syntax and semantics -* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/). +* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/) and receiving +* Agent mode +* OTLP receiver endpoint -Things considered unstable for 2.x: +Things considered unstable for 3.x: * Any feature listed as experimental or subject to change, including: - * The [`holt_winters` PromQL function](https://github.com/prometheus/prometheus/issues/2458) - * Remote write receiving, remote read and the remote read endpoint + * The [`double_exponential_smoothing` PromQL function](https://github.com/prometheus/prometheus/issues/2458) + * Remote read and the remote read endpoint * Server-side HTTPS and basic authentication -* Service discovery integrations, with the exception of `static_configs` and `file_sd_configs` +* Service discovery integrations, with the exception of `static_configs`, `file_sd_configs` and `http_sd_config` * Go APIs of packages that are part of the server * HTML generated by the web UI * The metrics in the /metrics endpoint of Prometheus itself * Exact on-disk format. Potential changes however, will be forward compatible and transparently handled by Prometheus * The format of the logs +Prometheus 2.x stability guarantees can be found [in the 2.x documentation](https://prometheus.io/docs/prometheus/2.55/stability/). + As long as you are not using any features marked as experimental/unstable, an upgrade within a major version can usually be performed without any operational adjustments and very little risk that anything will break. Any breaking changes diff --git a/documentation/examples/prometheus-otlp.yml b/documentation/examples/prometheus-otlp.yml new file mode 100644 index 000000000..f0a8ab8b1 --- /dev/null +++ b/documentation/examples/prometheus-otlp.yml @@ -0,0 +1,31 @@ +# my global config +global: + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + +otlp: + # Recommended attributes to be promoted to labels. + promote_resource_attributes: + - service.instance.id + - service.name + - service.namespace + - cloud.availability_zone + - cloud.region + - container.name + - deployment.environment.name + - k8s.cluster.name + - k8s.container.name + - k8s.cronjob.name + - k8s.daemonset.name + - k8s.deployment.name + - k8s.job.name + - k8s.namespace.name + - k8s.pod.name + - k8s.replicaset.name + - k8s.statefulset.name + # Ingest OTLP data keeping UTF-8 characters in metric/label names. + translation_strategy: NoUTF8EscapingWithSuffixes + +storage: + # OTLP is a push-based protocol, Out of order samples is a common scenario. + tsdb: + out_of_order_time_window: 30m diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index 0aad43758..b6a56f9ef 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -6,9 +6,9 @@ require ( github.com/alecthomas/kingpin/v2 v2.4.0 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 - github.com/influxdata/influxdb v1.11.6 - github.com/prometheus/client_golang v1.20.4 - github.com/prometheus/common v0.60.0 + github.com/influxdata/influxdb v1.11.7 + github.com/prometheus/client_golang v1.20.5 + github.com/prometheus/common v0.60.1 github.com/prometheus/prometheus v0.53.1 github.com/stretchr/testify v1.9.0 ) diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index 936b448d8..7af1984c6 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -166,8 +166,8 @@ github.com/hetznercloud/hcloud-go/v2 v2.9.0 h1:s0N6R7Zoi2DPfMtUF5o9VeUBzTtHVY6MI github.com/hetznercloud/hcloud-go/v2 v2.9.0/go.mod h1:qtW/TuU7Bs16ibXl/ktJarWqU2LwHr7eGlwoilHxtgg= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/influxdata/influxdb v1.11.6 h1:zS5MRY+RQ5/XFTer5R8xQRnY17JYSbacvO6OaP164wU= -github.com/influxdata/influxdb v1.11.6/go.mod h1:F10NoQb9qa04lME3pTPWQrYt4JZ/ke1Eei+1ttgHHrg= +github.com/influxdata/influxdb v1.11.7 h1:C31A+S9YfjTCOuAv9Qs0ZdQufslOZZBtejjxiV8QNQw= +github.com/influxdata/influxdb v1.11.7/go.mod h1:zRTAuk/Ie/V1LGxJUv8jfDmfv+ypz22lxfhc1MxC3rI= github.com/ionos-cloud/sdk-go/v6 v6.1.11 h1:J/uRN4UWO3wCyGOeDdMKv8LWRzKu6UIkLEaes38Kzh8= github.com/ionos-cloud/sdk-go/v6 v6.1.11/go.mod h1:EzEgRIDxBELvfoa/uBN0kOQaqovLjUWEB7iW4/Q+t4k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= @@ -253,8 +253,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -264,8 +264,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= -github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= diff --git a/go.mod b/go.mod index e019aabf6..5f138895d 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,8 @@ go 1.22.0 toolchain go1.23.0 require ( - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.14.0 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 github.com/Code-Hex/go-generics-cache v1.5.1 @@ -17,10 +17,10 @@ require ( github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 github.com/cespare/xxhash/v2 v2.3.0 github.com/dennwc/varint v1.0.0 - github.com/digitalocean/godo v1.126.0 + github.com/digitalocean/godo v1.128.0 github.com/docker/docker v27.3.1+incompatible - github.com/edsrzf/mmap-go v1.1.0 - github.com/envoyproxy/go-control-plane v0.13.0 + github.com/edsrzf/mmap-go v1.2.0 + github.com/envoyproxy/go-control-plane v0.13.1 github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fsnotify/fsnotify v1.7.0 @@ -34,14 +34,14 @@ require ( github.com/gophercloud/gophercloud v1.14.1 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc github.com/grpc-ecosystem/grpc-gateway v1.16.0 - github.com/hashicorp/consul/api v1.29.4 + github.com/hashicorp/consul/api v1.30.0 github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 - github.com/hetznercloud/hcloud-go/v2 v2.13.1 + github.com/hetznercloud/hcloud-go/v2 v2.15.0 github.com/ionos-cloud/sdk-go/v6 v6.2.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.17.10 + github.com/klauspost/compress v1.17.11 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.41.0 + github.com/linode/linodego v1.42.0 github.com/miekg/dns v1.1.62 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f @@ -50,25 +50,26 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.60.0 + github.com/prometheus/common v0.60.1 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 - github.com/prometheus/exporter-toolkit v0.13.0 + github.com/prometheus/exporter-toolkit v0.13.1 github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c github.com/stretchr/testify v1.9.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/pdata v1.16.0 - go.opentelemetry.io/collector/semconv v0.110.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 - go.opentelemetry.io/otel v1.30.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 - go.opentelemetry.io/otel/sdk v1.30.0 - go.opentelemetry.io/otel/trace v1.30.0 + go.opentelemetry.io/collector/pdata v1.18.0 + go.opentelemetry.io/collector/semconv v0.112.0 + go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/otel v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 + go.opentelemetry.io/otel/sdk v1.31.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 @@ -78,10 +79,10 @@ require ( golang.org/x/sys v0.26.0 golang.org/x/text v0.19.0 golang.org/x/tools v0.26.0 - google.golang.org/api v0.199.0 - google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 + google.golang.org/api v0.204.0 + google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 google.golang.org/grpc v1.67.1 - google.golang.org/protobuf v1.34.2 + google.golang.org/protobuf v1.35.1 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.31.1 @@ -92,8 +93,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.9.5 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect + cloud.google.com/go/auth v0.10.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect cloud.google.com/go/compute/metadata v0.5.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect @@ -184,15 +185,15 @@ require ( github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.30.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.28.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.30.0 // indirect golang.org/x/term v0.25.0 // indirect - golang.org/x/time v0.6.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + golang.org/x/time v0.7.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/go.sum b/go.sum index b601aab9b..fa4367d15 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,10 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.9.5 h1:4CTn43Eynw40aFVr3GpPqsQponx2jv0BQpjvajsbbzw= -cloud.google.com/go/auth v0.9.5/go.mod h1:Xo0n7n66eHyOWWCnitop6870Ilwo3PiZyodVkkH1xWM= -cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= -cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= +cloud.google.com/go/auth v0.10.0 h1:tWlkvFAh+wwTOzXIjrwM64karR1iTBZ/GRr0S/DULYo= +cloud.google.com/go/auth v0.10.0/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= +cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk= +cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -36,10 +36,12 @@ cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohl cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.14.0 h1:nyQWyZvwGTvunIMxi1Y9uXkcyr+I7TeNrr/foo4Kpk8= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.14.0/go.mod h1:l38EPgmsp71HHLq9j7De57JcKOWPyhrsW1Awm1JS6K0= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 h1:JZg6HRh6W6U4OLl6lk7BZ7BLisIzM9dG1R50zUk9C/M= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0/go.mod h1:YL1xnZ6QejvQHWJrX/AvhFl4WW4rqHVoKspWNVwFk0M= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 h1:B/dfvscEQtew9dVuoxqxrUKKv8Ih2f55PydknDamU+g= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0/go.mod h1:fiPSssYvltE08HJchL04dOy+RD4hgrjph0cwGGMntdI= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.0 h1:+m0M/LFxN43KvULkDNfdXOgrjtg6UYJPFBJyuEcRCAw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.0/go.mod h1:PwOyop78lveYMRs6oCxjiVyBdyCgIYH6XHIVZO9/SFQ= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 h1:LkHbJbgF3YyvC53aqYGR+wWQDn2Rdp9AQdGndf9QvY4= @@ -52,6 +54,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1. github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1/go.mod h1:c/wcGeGx5FUPbM/JltUYHZcKmigwyVLJlDq+4HdtXaw= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -121,8 +125,10 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= -github.com/digitalocean/godo v1.126.0 h1:+Znh7VMQj/E8ArbjWnc7OKGjWfzC+I8OCSRp7r1MdD8= -github.com/digitalocean/godo v1.126.0/go.mod h1:PU8JB6I1XYkQIdHFop8lLAY9ojp6M0XcU0TWaQSxbrc= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/digitalocean/godo v1.128.0 h1:cGn/ibMSRZ9+8etbzMv2MnnCEPTTGlEnx3HHTPwdk1U= +github.com/digitalocean/godo v1.128.0/go.mod h1:PU8JB6I1XYkQIdHFop8lLAY9ojp6M0XcU0TWaQSxbrc= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= @@ -133,15 +139,15 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= -github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= +github.com/edsrzf/mmap-go v1.2.0 h1:hXLYlkbaPzt1SaQk+anYwKSRNhufIDCchSPkUD6dD84= +github.com/edsrzf/mmap-go v1.2.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.13.0 h1:HzkeUz1Knt+3bK+8LG1bxOO/jzWZmdxpwC51i202les= -github.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8= +github.com/envoyproxy/go-control-plane v0.13.1 h1:vPfJZCkob6yTMEgS+0TwfTUfbHjfy/6vOJ8hUWX/uXE= +github.com/envoyproxy/go-control-plane v0.13.1/go.mod h1:X45hY0mufo6Fd0KW3rqsGvQMw58jvjymeCzBU3mWyHw= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= @@ -300,10 +306,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= -github.com/hashicorp/consul/api v1.29.4 h1:P6slzxDLBOxUSj3fWo2o65VuKtbtOXFi7TSSgtXutuE= -github.com/hashicorp/consul/api v1.29.4/go.mod h1:HUlfw+l2Zy68ceJavv2zAyArl2fqhGWnMycyt56sBgg= -github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0= -github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg= +github.com/hashicorp/consul/api v1.30.0 h1:ArHVMMILb1nQv8vZSGIwwQd2gtc+oSQZ6CalyiyH2XQ= +github.com/hashicorp/consul/api v1.30.0/go.mod h1:B2uGchvaXVW2JhFoS8nqTxMD5PBykr4ebY4JWHTTeLM= github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg= github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s= github.com/hashicorp/cronexpr v1.1.2 h1:wG/ZYIKT+RT3QkOdgYc+xsKWVRgnxJ1OJtjjy84fJ9A= @@ -353,8 +357,8 @@ github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 h1:fgVfQ4AC1av github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3/go.mod h1:svtxn6QnrQ69P23VvIWMR34tg3vmwLz4UdUzm1dSCgE= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= -github.com/hetznercloud/hcloud-go/v2 v2.13.1 h1:jq0GP4QaYE5d8xR/Zw17s9qoaESRJMXfGmtD1a/qckQ= -github.com/hetznercloud/hcloud-go/v2 v2.13.1/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= +github.com/hetznercloud/hcloud-go/v2 v2.15.0 h1:6mpMJ/RuX1woZj+MCJdyKNEX9129KDkEIDeeyfr4GD4= +github.com/hetznercloud/hcloud-go/v2 v2.15.0/go.mod h1:h8sHav+27Xa+48cVMAvAUMELov5h298Ilg2vflyTHgg= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= @@ -381,10 +385,12 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6 h1:IsMZxCuZqKuao2vNdfD82fjjgPLfyHLpR41Z88viRWs= +github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6/go.mod h1:3VeWNIJaW+O5xpRQbPp0Ybqu1vJd/pm7s2F473HRrkw= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= -github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -399,8 +405,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/linode/linodego v1.41.0 h1:GcP7JIBr9iLRJ9FwAtb9/WCT1DuPJS/xUApapfdjtiY= -github.com/linode/linodego v1.41.0/go.mod h1:Ow4/XZ0yvWBzt3iAHwchvhSx30AyLintsSMvvQ2/SJY= +github.com/linode/linodego v1.42.0 h1:ZSbi4MtvwrfB9Y6bknesorvvueBGGilcmh2D5dq76RM= +github.com/linode/linodego v1.42.0/go.mod h1:2yzmY6pegPBDgx2HDllmt0eIk2IlzqcgK6NR0wFCFRY= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= @@ -500,8 +506,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -513,14 +519,14 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= -github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= -github.com/prometheus/exporter-toolkit v0.13.0 h1:lmA0Q+8IaXgmFRKw09RldZmZdnvu9wwcDLIXGmTPw1c= -github.com/prometheus/exporter-toolkit v0.13.0/go.mod h1:2uop99EZl80KdXhv/MxVI2181fMcwlsumFOqBecGkG0= +github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04= +github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= @@ -528,10 +534,12 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0y4= +github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 h1:yoKAVkEVwAqbGbR8n87rHQ1dulL25rKloGadb3vm770= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30/go.mod h1:sH0u6fq6x4R5M7WxkoQFY/o7UaiItec0o1LinLCJNq8= @@ -587,26 +595,28 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/pdata v1.16.0 h1:g02K8jlRnmQ7TQDuXpdgVL6vIxIVqr5Gbb1qIR27rto= -go.opentelemetry.io/collector/pdata v1.16.0/go.mod h1:YZZJIt2ehxosYf/Y1pbvexjNWsIGNNrzzlCTO9jC1F4= -go.opentelemetry.io/collector/semconv v0.110.0 h1:KHQnOHe3gUz0zsxe8ph9kN5OTypCFD4V+06AiBTfeNk= -go.opentelemetry.io/collector/semconv v0.110.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 h1:m0yTiGDLUvVYaTFbAvCkVYIYcvwKt3G7OLoN77NUs/8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0/go.mod h1:wBQbT4UekBfegL2nx0Xk1vBcnzyBPsIVm9hRG4fYcr4= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0/go.mod h1:4lVs6obhSVRb1EW5FhOuBTyiQhtRtAnnva9vD3yRfq8= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= -go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE= -go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +go.opentelemetry.io/collector/pdata v1.18.0 h1:/yg2rO2dxqDM2p6GutsMCxXN6sKlXwyIz/ZYyUPONBg= +go.opentelemetry.io/collector/pdata v1.18.0/go.mod h1:Ox1YVLe87cZDB/TL30i4SUz1cA5s6AM6SpFMfY61ICs= +go.opentelemetry.io/collector/semconv v0.112.0 h1:JPQyvZhlNLVSuVI+FScONaiFygB7h7NTZceUEKIQUEc= +go.opentelemetry.io/collector/semconv v0.112.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 h1:FFeLy03iVTXP6ffeN2iXrxfGsZGCjVx0/4KlizjyBwU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0/go.mod h1:TMu73/k1CP8nBUpDLc71Wj/Kf7ZS9FK5b53VapRsP9o= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= @@ -816,8 +826,8 @@ golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -882,8 +892,8 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.199.0 h1:aWUXClp+VFJmqE0JPvpZOK3LDQMyFKYIow4etYd9qxs= -google.golang.org/api v0.199.0/go.mod h1:ohG4qSztDJmZdjK/Ar6MhbAmb/Rpi4JHOqagsh90K28= +google.golang.org/api v0.204.0 h1:3PjmQQEDkR/ENVZZwIYB4W/KzYtN8OrqnNcHWpeR8E4= +google.golang.org/api v0.204.0/go.mod h1:69y8QSoKIbL9F94bWgWAq6wGqGwyjBgi2y8rAK8zLag= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -918,10 +928,10 @@ google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1m google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc= -google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 h1:fVoAXEKA4+yufmbdVYv+SE73+cPZbbbe8paLsHfkK+U= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53/go.mod h1:riSXTwQ4+nqmPGtobMFyW5FqVAmIs0St6VPp4Ug7CE4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -948,8 +958,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index a6ad47acd..e5519a56d 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -14,6 +14,7 @@ package histogram import ( + "errors" "fmt" "math" "strings" @@ -784,16 +785,16 @@ func (h *FloatHistogram) Validate() error { return fmt.Errorf("custom buckets: %w", err) } if h.ZeroCount != 0 { - return fmt.Errorf("custom buckets: must have zero count of 0") + return errors.New("custom buckets: must have zero count of 0") } if h.ZeroThreshold != 0 { - return fmt.Errorf("custom buckets: must have zero threshold of 0") + return errors.New("custom buckets: must have zero threshold of 0") } if len(h.NegativeSpans) > 0 { - return fmt.Errorf("custom buckets: must not have negative spans") + return errors.New("custom buckets: must not have negative spans") } if len(h.NegativeBuckets) > 0 { - return fmt.Errorf("custom buckets: must not have negative buckets") + return errors.New("custom buckets: must not have negative buckets") } } else { if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { @@ -807,7 +808,7 @@ func (h *FloatHistogram) Validate() error { return fmt.Errorf("negative side: %w", err) } if h.CustomValues != nil { - return fmt.Errorf("histogram with exponential schema must not have custom bounds") + return errors.New("histogram with exponential schema must not have custom bounds") } } err := checkHistogramBuckets(h.PositiveBuckets, &pCount, false) @@ -948,10 +949,10 @@ func (h *FloatHistogram) floatBucketIterator( positive bool, absoluteStartValue float64, targetSchema int32, ) floatBucketIterator { if h.UsesCustomBuckets() && targetSchema != h.Schema { - panic(fmt.Errorf("cannot merge from custom buckets schema to exponential schema")) + panic(errors.New("cannot merge from custom buckets schema to exponential schema")) } if !h.UsesCustomBuckets() && IsCustomBucketsSchema(targetSchema) { - panic(fmt.Errorf("cannot merge from exponential buckets schema to custom schema")) + panic(errors.New("cannot merge from exponential buckets schema to custom schema")) } if targetSchema > h.Schema { panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema)) diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index e4b99ec42..778aefe28 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -14,6 +14,7 @@ package histogram import ( + "errors" "fmt" "math" "slices" @@ -432,16 +433,16 @@ func (h *Histogram) Validate() error { return fmt.Errorf("custom buckets: %w", err) } if h.ZeroCount != 0 { - return fmt.Errorf("custom buckets: must have zero count of 0") + return errors.New("custom buckets: must have zero count of 0") } if h.ZeroThreshold != 0 { - return fmt.Errorf("custom buckets: must have zero threshold of 0") + return errors.New("custom buckets: must have zero threshold of 0") } if len(h.NegativeSpans) > 0 { - return fmt.Errorf("custom buckets: must not have negative spans") + return errors.New("custom buckets: must not have negative spans") } if len(h.NegativeBuckets) > 0 { - return fmt.Errorf("custom buckets: must not have negative buckets") + return errors.New("custom buckets: must not have negative buckets") } } else { if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { @@ -455,7 +456,7 @@ func (h *Histogram) Validate() error { return fmt.Errorf("negative side: %w", err) } if h.CustomValues != nil { - return fmt.Errorf("histogram with exponential schema must not have custom bounds") + return errors.New("histogram with exponential schema must not have custom bounds") } } err := checkHistogramBuckets(h.PositiveBuckets, &pCount, true) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index eb79f7be2..93331cf99 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -16,6 +16,7 @@ package relabel import ( "crypto/md5" "encoding/binary" + "errors" "fmt" "strconv" "strings" @@ -114,10 +115,10 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { func (c *Config) Validate() error { if c.Action == "" { - return fmt.Errorf("relabel action cannot be empty") + return errors.New("relabel action cannot be empty") } if c.Modulus == 0 && c.Action == HashMod { - return fmt.Errorf("relabel configuration for hashmod requires non-zero modulus") + return errors.New("relabel configuration for hashmod requires non-zero modulus") } if (c.Action == Replace || c.Action == HashMod || c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index bfb85ce74..bb36a2120 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -111,6 +111,20 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { ) } + for k, v := range g.Labels { + if !model.LabelName(k).IsValid() || k == model.MetricNameLabel { + errs = append( + errs, fmt.Errorf("invalid label name: %s", k), + ) + } + + if !model.LabelValue(v).IsValid() { + errs = append( + errs, fmt.Errorf("invalid label value: %s", v), + ) + } + } + set[g.Name] = struct{}{} for i, r := range g.Rules { @@ -136,11 +150,12 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - QueryOffset *model.Duration `yaml:"query_offset,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } // Rule describes an alerting or recording rule. @@ -169,14 +184,14 @@ type RuleNode struct { func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Record.Value != "" && r.Alert.Value != "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("only one of 'record' and 'alert' must be set"), + err: errors.New("only one of 'record' and 'alert' must be set"), node: &r.Record, nodeAlt: &r.Alert, }) } if r.Record.Value == "" && r.Alert.Value == "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("one of 'record' or 'alert' must be set"), + err: errors.New("one of 'record' or 'alert' must be set"), node: &r.Record, nodeAlt: &r.Alert, }) @@ -184,7 +199,7 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Expr.Value == "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("field 'expr' must be set in rule"), + err: errors.New("field 'expr' must be set in rule"), node: &r.Expr, }) } else if _, err := parser.ParseExpr(r.Expr.Value); err != nil { @@ -196,19 +211,19 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Record.Value != "" { if len(r.Annotations) > 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'annotations' in recording rule"), + err: errors.New("invalid field 'annotations' in recording rule"), node: &r.Record, }) } if r.For != 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'for' in recording rule"), + err: errors.New("invalid field 'for' in recording rule"), node: &r.Record, }) } if r.KeepFiringFor != 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'keep_firing_for' in recording rule"), + err: errors.New("invalid field 'keep_firing_for' in recording rule"), node: &r.Record, }) } diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index 669f1da4e..73ea17459 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -107,6 +107,23 @@ groups: severity: "page" annotations: summary: "Instance {{ $labels.instance }} down" +`, + shouldPass: true, + }, + { + ruleString: ` +groups: +- name: example + labels: + team: myteam + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: "page" + annotations: + summary: "Instance {{ $labels.instance }} down" `, shouldPass: true, }, diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index 3b8b8f305..bd0d5089a 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -40,6 +40,10 @@ var newTestParserFns = map[string]newParser{ "omtext": func(b []byte, st *labels.SymbolTable) Parser { return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) }, + "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { + p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + return NewNHCBParser(p, st, false) + }, } // BenchmarkParse benchmarks parsing, mimicking how scrape/scrape.go#append use it. @@ -78,6 +82,10 @@ func BenchmarkParse(b *testing.B) { // We don't pass compareToExpfmtFormat: expfmt.TypeOpenMetrics as expfmt does not support OM exemplars, see https://github.com/prometheus/common/issues/703. {dataFile: "omtestdata.txt", parser: "omtext"}, {dataFile: "promtestdata.txt", parser: "omtext"}, // Compare how omtext parser deals with Prometheus text format vs promtext. + + // NHCB. + {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. + {dataFile: "omhistogramdata.txt", parser: "omtext_with_nhcb"}, // Measure NHCB over OM parser. } { var buf []byte dataCase := bcase.dataFile diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 3b0e9a96e..268285528 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -14,6 +14,8 @@ package textparse import ( + "errors" + "fmt" "mime" "github.com/prometheus/common/model" @@ -23,8 +25,7 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Parser parses samples from a byte slice of samples in the official -// Prometheus and OpenMetrics text exposition formats. +// Parser parses samples from a byte slice of samples in different exposition formats. type Parser interface { // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. @@ -58,6 +59,8 @@ type Parser interface { // Metric writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. + // The values of the "le" labels of classic histograms and "quantile" labels + // of summaries should follow the OpenMetrics formatting rules. Metric(l *labels.Labels) string // Exemplar writes the exemplar of the current sample into the passed @@ -78,28 +81,65 @@ type Parser interface { Next() (Entry, error) } -// New returns a new parser of the byte slice. -// -// This function always returns a valid parser, but might additionally -// return an error if the content type cannot be parsed. -func New(b []byte, contentType string, parseClassicHistograms, skipOMCTSeries bool, st *labels.SymbolTable) (Parser, error) { +// extractMediaType returns the mediaType of a required parser. It tries first to +// extract a valid and supported mediaType from contentType. If that fails, +// the provided fallbackType (possibly an empty string) is returned, together with +// an error. fallbackType is used as-is without further validation. +func extractMediaType(contentType, fallbackType string) (string, error) { if contentType == "" { - return NewPromParser(b, st), nil + if fallbackType == "" { + return "", errors.New("non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target") + } + return fallbackType, fmt.Errorf("non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol %q", fallbackType) } + // We have a contentType, parse it. mediaType, _, err := mime.ParseMediaType(contentType) if err != nil { - return NewPromParser(b, st), err + if fallbackType == "" { + retErr := fmt.Errorf("cannot parse Content-Type %q and no fallback_scrape_protocol for target", contentType) + return "", errors.Join(retErr, err) + } + retErr := fmt.Errorf("could not parse received Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) + return fallbackType, errors.Join(retErr, err) } + + // We have a valid media type, either we recognise it and can use it + // or we have to error. + switch mediaType { + case "application/openmetrics-text", "application/vnd.google.protobuf", "text/plain": + return mediaType, nil + } + // We're here because we have no recognised mediaType. + if fallbackType == "" { + return "", fmt.Errorf("received unsupported Content-Type %q and no fallback_scrape_protocol specified for target", contentType) + } + return fallbackType, fmt.Errorf("received unsupported Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) +} + +// New returns a new parser of the byte slice. +// +// This function no longer guarantees to return a valid parser. +// +// It only returns a valid parser if the supplied contentType and fallbackType allow. +// An error may also be returned if fallbackType had to be used or there was some +// other error parsing the supplied Content-Type. +// If the returned parser is nil then the scrape must fail. +func New(b []byte, contentType, fallbackType string, parseClassicHistograms, skipOMCTSeries bool, st *labels.SymbolTable) (Parser, error) { + mediaType, err := extractMediaType(contentType, fallbackType) + // err may be nil or something we want to warn about. + switch mediaType { case "application/openmetrics-text": return NewOpenMetricsParser(b, st, func(o *openMetricsParserOptions) { o.SkipCTSeries = skipOMCTSeries - }), nil + }), err case "application/vnd.google.protobuf": - return NewProtobufParser(b, parseClassicHistograms, st), nil + return NewProtobufParser(b, parseClassicHistograms, st), err + case "text/plain": + return NewPromParser(b, st), err default: - return NewPromParser(b, st), nil + return nil, err } } diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 3f2f758d7..72c8284f2 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -22,6 +22,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -31,6 +32,10 @@ import ( func TestNewParser(t *testing.T) { t.Parallel() + requireNilParser := func(t *testing.T, p Parser) { + require.Nil(t, p) + } + requirePromParser := func(t *testing.T, p Parser) { require.NotNil(t, p) _, ok := p.(*PromParser) @@ -43,34 +48,83 @@ func TestNewParser(t *testing.T) { require.True(t, ok) } + requireProtobufParser := func(t *testing.T, p Parser) { + require.NotNil(t, p) + _, ok := p.(*ProtobufParser) + require.True(t, ok) + } + for name, tt := range map[string]*struct { - contentType string - validateParser func(*testing.T, Parser) - err string + contentType string + fallbackScrapeProtocol config.ScrapeProtocol + validateParser func(*testing.T, Parser) + err string }{ "empty-string": { - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target", + }, + "empty-string-fallback-text-plain": { + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol \"text/plain\"", }, "invalid-content-type-1": { contentType: "invalid/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "expected token after slash", }, + "invalid-content-type-1-fallback-text-plain": { + contentType: "invalid/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-openmetrics": { + contentType: "invalid/", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText0_0_1, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-protobuf": { + contentType: "invalid/", + validateParser: requireProtobufParser, + fallbackScrapeProtocol: config.PrometheusProto, + err: "expected token after slash", + }, "invalid-content-type-2": { contentType: "invalid/invalid/invalid", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "unexpected content after media subtype", }, + "invalid-content-type-2-fallback-text-plain": { + contentType: "invalid/invalid/invalid", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "unexpected content after media subtype", + }, "invalid-content-type-3": { contentType: "/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "no media type", }, + "invalid-content-type-3-fallback-text-plain": { + contentType: "/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "no media type", + }, "invalid-content-type-4": { contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "duplicate parameter name", }, + "invalid-content-type-4-fallback-open-metrics": { + contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText1_0_0, + err: "duplicate parameter name", + }, "openmetrics": { contentType: "application/openmetrics-text", validateParser: requireOpenMetricsParser, @@ -87,20 +141,33 @@ func TestNewParser(t *testing.T) { contentType: "text/plain", validateParser: requirePromParser, }, + "protobuf": { + contentType: "application/vnd.google.protobuf", + validateParser: requireProtobufParser, + }, "plain-text-with-version": { contentType: "text/plain; version=0.0.4", validateParser: requirePromParser, }, "some-other-valid-content-type": { contentType: "text/html", - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "received unsupported Content-Type \"text/html\" and no fallback_scrape_protocol specified for target", + }, + "some-other-valid-content-type-fallback-text-plain": { + contentType: "text/html", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "received unsupported Content-Type \"text/html\", using fallback_scrape_protocol \"text/plain\"", }, } { t.Run(name, func(t *testing.T) { tt := tt // Copy to local variable before going parallel. t.Parallel() - p, err := New([]byte{}, tt.contentType, false, false, labels.NewSymbolTable()) + fallbackProtoMediaType := tt.fallbackScrapeProtocol.HeaderMediaType() + + p, err := New([]byte{}, tt.contentType, fallbackProtoMediaType, false, false, labels.NewSymbolTable()) tt.validateParser(t, p) if tt.err == "" { require.NoError(t, err) @@ -172,13 +239,13 @@ func testParse(t *testing.T, p Parser) (ret []parsedEntry) { } p.Metric(&got.lset) - for e := (exemplar.Exemplar{}); p.Exemplar(&e); { - got.es = append(got.es, e) - } // Parser reuses int pointer. if ct := p.CreatedTimestamp(); ct != nil { got.ct = int64p(*ct) } + for e := (exemplar.Exemplar{}); p.Exemplar(&e); { + got.es = append(got.es, e) + } case EntryType: m, got.typ = p.Type() got.m = string(m) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go new file mode 100644 index 000000000..6fe2e8e54 --- /dev/null +++ b/model/textparse/nhcbparse.go @@ -0,0 +1,374 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "errors" + "io" + "math" + "strconv" + "strings" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type collectionState int + +const ( + stateStart collectionState = iota + stateCollecting + stateEmitting +) + +// The NHCBParser wraps a Parser and converts classic histograms to native +// histograms with custom buckets. +// +// Since Parser interface is line based, this parser needs to keep track +// of the last classic histogram series it saw to collate them into a +// single native histogram. +// +// Note: +// - Only series that have the histogram metadata type are considered for +// conversion. +// - The classic series are also returned if keepClassicHistograms is true. +type NHCBParser struct { + // The parser we're wrapping. + parser Parser + // Option to keep classic histograms along with converted histograms. + keepClassicHistograms bool + + // Labels builder. + builder labels.ScratchBuilder + + // State of the parser. + state collectionState + + // Caches the values from the underlying parser. + // For Series and Histogram. + bytes []byte + ts *int64 + value float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + // For Metric. + lset labels.Labels + metricString string + // For Type. + bName []byte + typ model.MetricType + + // Caches the entry itself if we are inserting a converted NHCB + // halfway through. + entry Entry + err error + + // Caches the values and metric for the inserted converted NHCB. + bytesNHCB []byte + hNHCB *histogram.Histogram + fhNHCB *histogram.FloatHistogram + lsetNHCB labels.Labels + exemplars []exemplar.Exemplar + ctNHCB *int64 + metricStringNHCB string + + // Collates values from the classic histogram series to build + // the converted histogram later. + tempLsetNHCB labels.Labels + tempNHCB convertnhcb.TempHistogram + tempExemplars []exemplar.Exemplar + tempExemplarCount int + tempCT *int64 + + // Remembers the last base histogram metric name (assuming it's + // a classic histogram) so we can tell if the next float series + // is part of the same classic histogram. + lastHistogramName string + lastHistogramLabelsHash uint64 + lastHistogramExponential bool + // Reused buffer for hashing labels. + hBuffer []byte +} + +func NewNHCBParser(p Parser, st *labels.SymbolTable, keepClassicHistograms bool) Parser { + return &NHCBParser{ + parser: p, + keepClassicHistograms: keepClassicHistograms, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + tempNHCB: convertnhcb.NewTempHistogram(), + } +} + +func (p *NHCBParser) Series() ([]byte, *int64, float64) { + return p.bytes, p.ts, p.value +} + +func (p *NHCBParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.state == stateEmitting { + return p.bytesNHCB, p.ts, p.hNHCB, p.fhNHCB + } + return p.bytes, p.ts, p.h, p.fh +} + +func (p *NHCBParser) Help() ([]byte, []byte) { + return p.parser.Help() +} + +func (p *NHCBParser) Type() ([]byte, model.MetricType) { + return p.bName, p.typ +} + +func (p *NHCBParser) Unit() ([]byte, []byte) { + return p.parser.Unit() +} + +func (p *NHCBParser) Comment() []byte { + return p.parser.Comment() +} + +func (p *NHCBParser) Metric(l *labels.Labels) string { + if p.state == stateEmitting { + *l = p.lsetNHCB + return p.metricStringNHCB + } + *l = p.lset + return p.metricString +} + +func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { + if p.state == stateEmitting { + if len(p.exemplars) == 0 { + return false + } + *ex = p.exemplars[0] + p.exemplars = p.exemplars[1:] + return true + } + return p.parser.Exemplar(ex) +} + +func (p *NHCBParser) CreatedTimestamp() *int64 { + switch p.state { + case stateStart: + if p.entry == EntrySeries || p.entry == EntryHistogram { + return p.parser.CreatedTimestamp() + } + case stateCollecting: + return p.tempCT + case stateEmitting: + return p.ctNHCB + } + return nil +} + +func (p *NHCBParser) Next() (Entry, error) { + if p.state == stateEmitting { + p.state = stateStart + if p.entry == EntrySeries { + isNHCB := p.handleClassicHistogramSeries(p.lset) + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + } + return p.entry, p.err + } + + p.entry, p.err = p.parser.Next() + if p.err != nil { + if errors.Is(p.err, io.EOF) && p.processNHCB() { + return EntryHistogram, nil + } + return EntryInvalid, p.err + } + switch p.entry { + case EntrySeries: + p.bytes, p.ts, p.value = p.parser.Series() + p.metricString = p.parser.Metric(&p.lset) + // Check the label set to see if we can continue or need to emit the NHCB. + var isNHCB bool + if p.compareLabels() { + // Labels differ. Check if we can emit the NHCB. + if p.processNHCB() { + return EntryHistogram, nil + } + isNHCB = p.handleClassicHistogramSeries(p.lset) + } else { + // Labels are the same. Check if after an exponential histogram. + if p.lastHistogramExponential { + isNHCB = false + } else { + isNHCB = p.handleClassicHistogramSeries(p.lset) + } + } + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + return p.entry, p.err + case EntryHistogram: + p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() + p.metricString = p.parser.Metric(&p.lset) + p.storeExponentialLabels() + case EntryType: + p.bName, p.typ = p.parser.Type() + } + if p.processNHCB() { + return EntryHistogram, nil + } + return p.entry, p.err +} + +// Return true if labels have changed and we should emit the NHCB. +func (p *NHCBParser) compareLabels() bool { + if p.state != stateCollecting { + return false + } + if p.typ != model.MetricTypeHistogram { + // Different metric type. + return true + } + if p.lastHistogramName != convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) { + // Different metric name. + return true + } + nextHash, _ := p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + // Different label values. + return p.lastHistogramLabelsHash != nextHash +} + +// Save the label set of the classic histogram without suffix and bucket `le` label. +func (p *NHCBParser) storeClassicLabels() { + p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + p.lastHistogramExponential = false +} + +func (p *NHCBParser) storeExponentialLabels() { + p.lastHistogramName = p.lset.Get(labels.MetricName) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer) + p.lastHistogramExponential = true +} + +// handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB +// if it is actually a classic histogram series (and not a normal float series) and if there +// isn't already a native histogram with the same name (assuming it is always processed +// right before the classic histograms) and returns true if the collation was done. +func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { + if p.typ != model.MetricTypeHistogram { + return false + } + mName := lset.Get(labels.MetricName) + // Sanity check to ensure that the TYPE metadata entry name is the same as the base name. + if convertnhcb.GetHistogramMetricBaseName(mName) != string(p.bName) { + return false + } + switch { + case strings.HasSuffix(mName, "_bucket") && lset.Has(labels.BucketLabel): + le, err := strconv.ParseFloat(lset.Get(labels.BucketLabel), 64) + if err == nil && !math.IsNaN(le) { + p.processClassicHistogramSeries(lset, "_bucket", func(hist *convertnhcb.TempHistogram) { + _ = hist.SetBucketCount(le, p.value) + }) + return true + } + case strings.HasSuffix(mName, "_count"): + p.processClassicHistogramSeries(lset, "_count", func(hist *convertnhcb.TempHistogram) { + _ = hist.SetCount(p.value) + }) + return true + case strings.HasSuffix(mName, "_sum"): + p.processClassicHistogramSeries(lset, "_sum", func(hist *convertnhcb.TempHistogram) { + _ = hist.SetSum(p.value) + }) + return true + } + return false +} + +func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { + if p.state != stateCollecting { + p.storeClassicLabels() + p.tempCT = p.parser.CreatedTimestamp() + p.state = stateCollecting + p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) + } + p.storeExemplars() + updateHist(&p.tempNHCB) +} + +func (p *NHCBParser) storeExemplars() { + for ex := p.nextExemplarPtr(); p.parser.Exemplar(ex); ex = p.nextExemplarPtr() { + p.tempExemplarCount++ + } +} + +func (p *NHCBParser) nextExemplarPtr() *exemplar.Exemplar { + switch { + case p.tempExemplarCount == len(p.tempExemplars)-1: + // Reuse the previously allocated exemplar, it was not filled up. + case len(p.tempExemplars) == cap(p.tempExemplars): + // Let the runtime grow the slice. + p.tempExemplars = append(p.tempExemplars, exemplar.Exemplar{}) + default: + // Take the next element into use. + p.tempExemplars = p.tempExemplars[:len(p.tempExemplars)+1] + } + return &p.tempExemplars[len(p.tempExemplars)-1] +} + +func (p *NHCBParser) swapExemplars() { + p.exemplars = p.tempExemplars[:p.tempExemplarCount] + p.tempExemplars = p.tempExemplars[:0] +} + +// processNHCB converts the collated classic histogram series to NHCB and caches the info +// to be returned to callers. Retruns true if the conversion was successful. +func (p *NHCBParser) processNHCB() bool { + if p.state != stateCollecting { + return false + } + h, fh, err := p.tempNHCB.Convert() + if err == nil { + if h != nil { + if err := h.Validate(); err != nil { + return false + } + p.hNHCB = h + p.fhNHCB = nil + } else if fh != nil { + if err := fh.Validate(); err != nil { + return false + } + p.hNHCB = nil + p.fhNHCB = fh + } + p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + strings.ReplaceAll(p.tempLsetNHCB.DropMetricName().String(), ", ", ",") + p.bytesNHCB = []byte(p.metricStringNHCB) + p.lsetNHCB = p.tempLsetNHCB + p.swapExemplars() + p.ctNHCB = p.tempCT + p.state = stateEmitting + } else { + p.state = stateStart + } + p.tempNHCB.Reset() + p.tempExemplarCount = 0 + p.tempCT = nil + return err == nil +} diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go new file mode 100644 index 000000000..859bcc1cb --- /dev/null +++ b/model/textparse/nhcbparse_test.go @@ -0,0 +1,984 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "encoding/binary" + "strconv" + "testing" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" +) + +func TestNHCBParserOnOMParser(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +# UNIT go_gc_duration_seconds seconds +go_gc_duration_seconds{quantile="0"} 4.9351e-05 +go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05 +go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +# HELP nohelp1 +# HELP help2 escape \ \n \\ \" \x chars +# UNIT nounit +go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05 +go_gc_duration_seconds_count 99 +some:aggregate:rate5m{a_b="c"} 1 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 33 123.123 +# TYPE hh histogram +hh_bucket{le="+Inf"} 1 +# TYPE gh gaugehistogram +gh_bucket{le="+Inf"} 1 +# TYPE hhh histogram +hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4 +hhh_count 1 # {id="histogram-count-test"} 4 +# TYPE ggh gaugehistogram +ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123 +ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123 +# TYPE smr_seconds summary +smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321 +smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321 +# TYPE ii info +ii{foo="bar"} 1 +# TYPE ss stateset +ss{ss="foo"} 1 +ss{ss="bar"} 0 +ss{A="a"} 0 +# TYPE un unknown +_metric_starting_with_underscore 1 +testmetric{_label_starting_with_underscore="foo"} 1 +testmetric{label="\"bar\""} 1 +# HELP foo Counter with and without labels to certify CT is parsed for both cases +# TYPE foo counter +foo_total 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created{a="b"} 1520872607.123 +# HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far +# TYPE bar summary +bar_count 17.0 +bar_sum 324789.3 +bar{quantile="0.95"} 123.7 +bar{quantile="0.99"} 150.0 +bar_created 1520872608.124 +# HELP baz Histogram with the same objective as above's summary +# TYPE baz histogram +baz_bucket{le="0.0"} 0 +baz_bucket{le="+Inf"} 17 +baz_count 17 +baz_sum 324789.3 +baz_created 1520872609.125 +# HELP fizz_created Gauge which shouldn't be parsed as CT +# TYPE fizz_created gauge +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="+Inf"} 18 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 +something_bucket{a="b",le="+Inf"} 9 +something_created{a="b"} 1520430002 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` + + input += "\n# HELP metric foo\x00bar" + input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# EOF\n" + + exp := []parsedEntry{ + { + m: "go_gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go_gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: "go_gc_duration_seconds", + unit: "seconds", + }, { + m: `go_gc_duration_seconds{quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), + }, { + m: `go_gc_duration_seconds{quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"), + }, { + m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: "nohelp1", + help: "", + }, { + m: "help2", + help: "escape \\ \n \\ \" \\x chars", + }, { + m: "nounit", + unit: "", + }, { + m: `go_gc_duration_seconds{quantile="1.0",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds_count`, + v: 99, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"), + }, { + m: `some:aggregate:rate5m{a_b="c"}`, + v: 1, + lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"), + }, { + m: "go_goroutines", + help: "Number of goroutines that currently exist.", + }, { + m: "go_goroutines", + typ: model.MetricTypeGauge, + }, { + m: `go_goroutines`, + v: 33, + t: int64p(123123), + lset: labels.FromStrings("__name__", "go_goroutines"), + }, { + m: "hh", + typ: model.MetricTypeHistogram, + }, { + m: `hh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hh"), + }, { + m: "gh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `gh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), + }, { + m: "hhh", + typ: model.MetricTypeHistogram, + }, { + m: `hhh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hhh"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, + }, { + m: "ggh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `ggh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: `ggh_count`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: "smr_seconds", + typ: model.MetricTypeSummary, + }, { + m: `smr_seconds_count`, + v: 2, + lset: labels.FromStrings("__name__", "smr_seconds_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: `smr_seconds_sum`, + v: 42, + lset: labels.FromStrings("__name__", "smr_seconds_sum"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: "ii", + typ: model.MetricTypeInfo, + }, { + m: `ii{foo="bar"}`, + v: 1, + lset: labels.FromStrings("__name__", "ii", "foo", "bar"), + }, { + m: "ss", + typ: model.MetricTypeStateset, + }, { + m: `ss{ss="foo"}`, + v: 1, + lset: labels.FromStrings("__name__", "ss", "ss", "foo"), + }, { + m: `ss{ss="bar"}`, + v: 0, + lset: labels.FromStrings("__name__", "ss", "ss", "bar"), + }, { + m: `ss{A="a"}`, + v: 0, + lset: labels.FromStrings("A", "a", "__name__", "ss"), + }, { + m: "un", + typ: model.MetricTypeUnknown, + }, { + m: "_metric_starting_with_underscore", + v: 1, + lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"), + }, { + m: "testmetric{_label_starting_with_underscore=\"foo\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"), + }, { + m: "testmetric{label=\"\\\"bar\\\"\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: "foo", + help: "Counter with and without labels to certify CT is parsed for both cases", + }, { + m: "foo", + typ: model.MetricTypeCounter, + }, { + m: "foo_total", + v: 17, + lset: labels.FromStrings("__name__", "foo_total"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: `foo_total{a="b"}`, + v: 17.0, + lset: labels.FromStrings("__name__", "foo_total", "a", "b"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: "bar", + help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", + }, { + m: "bar", + typ: model.MetricTypeSummary, + }, { + m: "bar_count", + v: 17.0, + lset: labels.FromStrings("__name__", "bar_count"), + ct: int64p(1520872608124), + }, { + m: "bar_sum", + v: 324789.3, + lset: labels.FromStrings("__name__", "bar_sum"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), + ct: int64p(1520872608124), + }, { + m: "baz", + help: "Histogram with the same objective as above's summary", + }, { + m: "baz", + typ: model.MetricTypeHistogram, + }, { + m: `baz{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 17, + Sum: 324789.3, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 1}}, // The first bucket has 0 count so we don't store it and Offset is 1. + PositiveBuckets: []int64{17}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "baz"), + ct: int64p(1520872609125), + }, { + m: "fizz_created", + help: "Gauge which shouldn't be parsed as CT", + }, { + m: "fizz_created", + typ: model.MetricTypeGauge, + }, { + m: `fizz_created`, + v: 17, + lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 16}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + ct: int64p(1520430001000), + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), + }, { + m: "metric", + help: "foo\x00bar", + }, { + m: "null_byte_metric{a=\"abc\x00\"}", + v: 1, + lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +func TestNHCBParserOMParser_MultipleHistograms(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_bucket{le="0.0"} 1 # {id="something-test"} -2.0 +something_bucket{le="1.0"} 16 # {id="something-test"} 0.5 +something_bucket{le="+Inf"} 18 # {id="something-test"} 8 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 # {id="something-test"} 0.0 123.321 +something_bucket{a="b",le="1.0"} 8 +something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 +# EOF +` + + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{1, 14, -13}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: -2.0}, + {Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, + {Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, + }, + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{8, -8, 1}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, + {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, + }, + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +// Verify the requirement tables from +// https://github.com/prometheus/prometheus/issues/13532 . +// "classic" means the option "always_scrape_classic_histograms". +// "nhcb" means the option "convert_classic_histograms_to_nhcb". +// +// Case 1. Only classic histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | YES | NO | NO |. +// | classic=true, nhcb=false | YES | NO | NO |. +// | classic=false, nhcb=true | NO | NO | YES |. +// | classic=true, nhcb=true | YES | NO | YES |. +// +// Case 2. Both classic and exponential histograms are exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | YES | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | YES | YES | NO |. +// +// Case 3. Only exponential histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | NO | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | NO | YES | NO |. +func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) { + type requirement struct { + expectClassic bool + expectExponential bool + expectNHCB bool + } + + cases := []map[string]requirement{ + // Case 1. + { + "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true}, + }, + // Case 2. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false}, + }, + // Case 3. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + }, + } + + // Create parser from keep classic option. + type parserFactory func(bool) Parser + + type testCase struct { + name string + parser parserFactory + classic bool + nhcb bool + exp []parsedEntry + } + + type parserOptions struct { + useUTF8sep bool + hasCreatedTimeStamp bool + } + // Defines the parser name, the Parser factory and the test cases + // supported by the parser and parser options. + parsers := []func() (string, parserFactory, []int, parserOptions){ + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + inputBuf := createTestProtoBufHistogram(t) + return NewProtobufParser(inputBuf.Bytes(), keepClassic, labels.NewSymbolTable()) + } + return "ProtoBuf", factory, []int{1, 2, 3}, parserOptions{useUTF8sep: true, hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestOpenMetricsHistogram() + return NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + } + return "OpenMetrics", factory, []int{1}, parserOptions{hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestPromHistogram() + return NewPromParser([]byte(input), labels.NewSymbolTable()) + } + return "Prometheus", factory, []int{1}, parserOptions{} + }, + } + + testCases := []testCase{} + for _, parser := range parsers { + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + parserName, parser, supportedCases, options := parser() + requirementName := "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb) + tc := testCase{ + name: "parser=" + parserName + ", " + requirementName, + parser: parser, + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for _, caseNumber := range supportedCases { + caseI := cases[caseNumber-1] + req, ok := caseI[requirementName] + require.True(t, ok, "Case %d does not have requirement %s", caseNumber, requirementName) + metric := "test_histogram" + strconv.Itoa(caseNumber) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(caseNumber), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + + var ct *int64 + if options.hasCreatedTimeStamp { + ct = int64p(1000) + } + + var bucketForMetric func(string) string + if options.useUTF8sep { + bucketForMetric = func(s string) string { + return "_bucket\xffle\xff" + s + } + } else { + bucketForMetric = func(s string) string { + return "_bucket{le=\"" + s + "\"}" + } + } + + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, exponentialSeries...) + } + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0004899999999999998"), + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0003899999999999998"), + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0002899999999999998"), + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("+Inf"), + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, nhcbSeries...) + } + } + testCases = append(testCases, tc) + } + } + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := tc.parser(tc.classic) + if tc.nhcb { + p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) + } + got := testParse(t, p) + requireEntries(t, tc.exp, got) + }) + } +} + +func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { + testMetricFamilies := []string{`name: "test_histogram1" +help: "Test histogram 1" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + > + timestamp_ms: 1234568 +>`, `name: "test_histogram2" +help: "Test histogram 2" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +>`, `name: "test_histogram3" +help: "Test histogram 3" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> +`} + + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := &bytes.Buffer{} + + for _, tmf := range testMetricFamilies { + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(tmf, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + return buf +} + +func createTestOpenMetricsHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234.568 +test_histogram1_sum 0.0008280461746287094 1234.568 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234.568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234.568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234.568 +test_histogram1_bucket{le="+Inf"} 175 1234.568 +test_histogram1_created 1 +# EOF` +} + +func createTestPromHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234568 +test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 +test_histogram1_bucket{le="+Inf"} 175 1234568` +} + +func TestNHCBParserErrorHandling(t *testing.T) { + input := `# HELP something Histogram with non cumulative buckets +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 18 +something_bucket{le="+Inf"} 1 +something_count{a="b"} 9 +something_sum{a="b"} 42123 +something_created{a="b"} 1520430002 +something_bucket{a="b",le="0.0"} 1 +something_bucket{a="b",le="+Inf"} 9 +# EOF` + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with non cumulative buckets", + }, + { + m: "something", + typ: model.MetricTypeHistogram, + }, + // The parser should skip the series with non-cumulative buckets. + { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 638e9619c..3ae9c7ddf 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "math" + "strconv" "strings" "unicode/utf8" @@ -101,6 +102,8 @@ type OpenMetricsParser struct { // Created timestamp parsing state. ct int64 ctHashSet uint64 + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool // visitedMFName is the metric family name of the last visited metric when peeking ahead // for _created series during the execution of the CreatedTimestamp method. visitedMFName []byte @@ -210,7 +213,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) p.builder.Add(label, value) } @@ -295,6 +298,14 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { p.skipCTSeries = false + p.ignoreExemplar = true + savedStart := p.start + defer func() { + p.ignoreExemplar = false + p.start = savedStart + p.l = resetLexer + }() + for { eType, err := p.Next() if err != nil { @@ -302,12 +313,12 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { // This might result in partial scrape with wrong/missing CT, but only // spec improvement would help. // TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } if eType != EntrySeries { // Assume we hit different family, no CT line found. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } @@ -321,14 +332,14 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) if peekedHash != currHash { // Found CT line for a different series, for our series no CT. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps ct := int64(p.val * 1000.0) - p.setCTParseValues(ct, currHash, currName, true, resetLexer) + p.setCTParseValues(ct, currHash, currName, true) return &ct } } @@ -370,17 +381,15 @@ func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []by // setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. // This is useful to prevent re-parsing the same series again and early return the CT value. -func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool, resetLexer *openMetricsLexer) { +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool) { p.ct = ct - p.l = resetLexer p.ctHashSet = ctHashSet p.visitedMFName = mfName p.skipCTSeries = skipCTSeries // Do we need to set it? } // resetCtParseValues resets the parser to the state before CreatedTimestamp method was called. -func (p *OpenMetricsParser) resetCTParseValues(resetLexer *openMetricsLexer) { - p.l = resetLexer +func (p *OpenMetricsParser) resetCTParseValues() { p.ctHashSet = 0 p.skipCTSeries = true } @@ -416,10 +425,12 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] - p.eOffsets = p.eOffsets[:0] - p.exemplar = p.exemplar[:0] - p.exemplarVal = 0 - p.hasExemplarTs = false + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } switch t := p.nextToken(); t { case tEOFWord: @@ -544,6 +555,16 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + // Parse the labels. p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { @@ -723,3 +744,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error } return val, nil } + +// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels +// of summaries follow OpenMetrics formatting rules. +func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string { + if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { + f, err := strconv.ParseFloat(v, 64) + if err == nil { + return formatOpenMetricsFloat(f) + } + } + return v +} diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index 467a23771..9c3c679ab 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -74,6 +74,7 @@ foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 foo_created{a="b"} 1520872607.123 foo_total{le="c"} 21.0 foo_created{le="c"} 1520872621.123 +foo_total{le="1"} 10.0 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 @@ -97,6 +98,7 @@ something_count 18 something_sum 324789.4 something_created 1520430001 something_bucket{le="0.0"} 1 +something_bucket{le="1"} 2 something_bucket{le="+Inf"} 18 # HELP yum Summary with _created between sum and quantiles # TYPE yum summary @@ -130,7 +132,7 @@ foobar{quantile="0.99"} 150.1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, @@ -302,6 +304,10 @@ foobar{quantile="0.99"} 150.1` v: 21.0, lset: labels.FromStrings("__name__", "foo_total", "le", "c"), ct: int64p(1520872621123), + }, { + m: `foo_total{le="1"}`, + v: 10.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "1"), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -385,6 +391,11 @@ foobar{quantile="0.99"} 150.1` v: 1, lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"), ct: int64p(1520430001000), + }, { + m: `something_bucket{le="1"}`, + v: 2, + lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"), + ct: int64p(1520430001000), }, { m: `something_bucket{le="+Inf"}`, v: 18, @@ -492,7 +503,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.25"}`, diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 575976927..17b0c3db8 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) } @@ -508,7 +509,7 @@ func yoloString(b []byte) string { func parseFloat(s string) (float64, error) { // Keep to pre-Go 1.13 float formats. if strings.ContainsAny(s, "pP_") { - return 0, fmt.Errorf("unsupported character in float") + return 0, errors.New("unsupported character in float") } return strconv.ParseFloat(s, 64) } diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index b726d8847..e8cf66f53 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -31,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423 +prometheus_http_request_duration_seconds_sum{handler="/"} 2000 +prometheus_http_request_duration_seconds_count{handler="/"} 1423 # Hrandom comment starting with prefix of HELP # wind_speed{A="2",c="3"} 12345 @@ -50,7 +57,8 @@ some:aggregate:rate5m{a_b="c"} 1 go_goroutines 33 123123 _metric_starting_with_underscore 1 testmetric{_label_starting_with_underscore="foo"} 1 -testmetric{label="\"bar\""} 1` +testmetric{label="\"bar\""} 1 +testmetric{le="10"} 1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" @@ -64,7 +72,7 @@ testmetric{label="\"bar\""} 1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25",}`, v: 7.424100000000001e-05, @@ -81,6 +89,32 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: "prometheus_http_request_duration_seconds", + help: "Histogram of latencies for HTTP requests.", + }, { + m: "prometheus_http_request_duration_seconds", + typ: model.MetricTypeHistogram, + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`, + v: 423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"), + }, { + m: `prometheus_http_request_duration_seconds_sum{handler="/"}`, + v: 2000, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"), + }, { + m: `prometheus_http_request_duration_seconds_count{handler="/"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"), }, { comment: "# Hrandom comment starting with prefix of HELP", }, { @@ -151,6 +185,10 @@ testmetric{label="\"bar\""} 1` m: "testmetric{label=\"\\\"bar\\\"\"}", v: 1, lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: `testmetric{le="10"}`, + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "le", "10"), }, { m: "metric", help: "foo\x00bar", @@ -197,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), }, { m: `{"go.gc_duration_seconds",quantile="0.25",}`, v: 7.424100000000001e-05, diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index b3dfdfca1..a77e1d728 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -20,7 +20,9 @@ import ( "fmt" "io" "math" + "strconv" "strings" + "sync" "unicode/utf8" "github.com/gogo/protobuf/proto" @@ -34,6 +36,15 @@ import ( dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) +// floatFormatBufPool is exclusively used in formatOpenMetricsFloat. +var floatFormatBufPool = sync.Pool{ + New: func() interface{} { + // To contain at most 17 digits and additional syntax for a float64. + b := make([]byte, 0, 24) + return &b + }, +} + // ProtobufParser is a very inefficient way of unmarshaling the old Prometheus // protobuf format and then present it as it if were parsed by a // Prometheus-2-style text parser. This is only done so that we can easily plug @@ -629,11 +640,15 @@ func formatOpenMetricsFloat(f float64) string { case math.IsInf(f, -1): return "-Inf" } - s := fmt.Sprint(f) - if strings.ContainsAny(s, "e.") { - return s + bp := floatFormatBufPool.Get().(*[]byte) + defer floatFormatBufPool.Put(bp) + + *bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64) + if bytes.ContainsAny(*bp, "e.") { + return string(*bp) } - return s + ".0" + *bp = append(*bp, '.', '0') + return string(*bp) } // isNativeHistogram returns false iff the provided histograms has no spans at diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index 0c09279fe..065459a69 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -409,6 +409,49 @@ metric: < > > +`, + `name: "test_histogram3" +help: "Similar histogram as before but now with integer buckets." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 6 + sample_sum: 50 + bucket: < + cumulative_count: 2 + upper_bound: -20 + > + bucket: < + cumulative_count: 4 + upper_bound: 20 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: 15 + timestamp: < + seconds: 1625851153 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 6 + upper_bound: 30 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: 25 + > + > + schema: 0 + zero_threshold: 0 + > +> + `, `name: "test_histogram_family" help: "Test histogram metric family with two very simple histograms." @@ -1050,6 +1093,66 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", @@ -1857,6 +1960,66 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", diff --git a/model/textparse/testdata/omhistogramdata.txt b/model/textparse/testdata/omhistogramdata.txt new file mode 100644 index 000000000..187616835 --- /dev/null +++ b/model/textparse/testdata/omhistogramdata.txt @@ -0,0 +1,45 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5001"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5001"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 +# EOF \ No newline at end of file diff --git a/notifier/notifier.go b/notifier/notifier.go index 482d2fdaa..e970b67e6 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -542,10 +542,10 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { begin := time.Now() - // v1Payload and v2Payload represent 'alerts' marshaled for Alertmanager API - // v1 or v2. Marshaling happens below. Reference here is for caching between + // cachedPayload represent 'alerts' marshaled for Alertmanager API v2. + // Marshaling happens below. Reference here is for caching between // for loop iterations. - var v1Payload, v2Payload []byte + var cachedPayload []byte n.mtx.RLock() amSets := n.alertmanagers @@ -576,29 +576,16 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { continue } // We can't use the cached values from previous iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } switch ams.cfg.APIVersion { - case config.AlertmanagerAPIVersionV1: - { - if v1Payload == nil { - v1Payload, err = json.Marshal(amAlerts) - if err != nil { - n.logger.Error("Encoding alerts for Alertmanager API v1 failed", "err", err) - ams.mtx.RUnlock() - return false - } - } - - payload = v1Payload - } case config.AlertmanagerAPIVersionV2: { - if v2Payload == nil { + if cachedPayload == nil { openAPIAlerts := alertsToOpenAPIAlerts(amAlerts) - v2Payload, err = json.Marshal(openAPIAlerts) + cachedPayload, err = json.Marshal(openAPIAlerts) if err != nil { n.logger.Error("Encoding alerts for Alertmanager API v2 failed", "err", err) ams.mtx.RUnlock() @@ -606,7 +593,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { } } - payload = v2Payload + payload = cachedPayload } default: { @@ -621,7 +608,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { if len(ams.cfg.AlertRelabelConfigs) > 0 { // We can't use the cached values on the next iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } for _, am := range ams.ams { diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index 83eaf8168..97b0274f2 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -50,27 +50,27 @@ func TestPostPath(t *testing.T) { }{ { in: "", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/prefix", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "/prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, } for _, c := range cases { - require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV1)) + require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2)) } } diff --git a/promql/bench_test.go b/promql/bench_test.go index cd6d1190c..943baceec 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" @@ -380,6 +382,126 @@ func BenchmarkNativeHistograms(b *testing.B) { } } +func BenchmarkInfoFunction(b *testing.B) { + // Initialize test storage and generate test series data. + testStorage := teststorage.New(b) + defer testStorage.Close() + + start := time.Unix(0, 0) + end := start.Add(2 * time.Hour) + step := 30 * time.Second + + // Generate time series data for the benchmark. + generateInfoFunctionTestSeries(b, testStorage, 100, 2000, 3600) + + // Define test cases with queries to benchmark. + cases := []struct { + name string + query string + }{ + { + name: "Joining info metrics with other metrics with group_left example 1", + query: "rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info{k8s_cluster_name=\"us-east\"}", + }, + { + name: "Joining info metrics with other metrics with info() example 1", + query: `info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name="us-east"})`, + }, + { + name: "Joining info metrics with other metrics with group_left example 2", + query: "sum by (k8s_cluster_name, http_status_code) (rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info)", + }, + { + name: "Joining info metrics with other metrics with info() example 2", + query: `sum by (k8s_cluster_name, http_status_code) (info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name=~".+"}))`, + }, + } + + // Benchmark each query type. + for _, tc := range cases { + // Initialize the PromQL engine once for all benchmarks. + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 50000000, + Timeout: 100 * time.Second, + EnableAtModifier: true, + EnableNegativeOffset: true, + } + engine := promql.NewEngine(opts) + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() // Stop the timer to exclude setup time. + qry, err := engine.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) + require.NoError(b, err) + b.StartTimer() + result := qry.Exec(context.Background()) + require.NoError(b, result.Err) + } + }) + } + + // Report allocations. + b.ReportAllocs() +} + +// Helper function to generate target_info and http_server_request_duration_seconds_count series for info function benchmarking. +func generateInfoFunctionTestSeries(tb testing.TB, stor *teststorage.TestStorage, infoSeriesNum, interval, numIntervals int) { + tb.Helper() + + ctx := context.Background() + statusCodes := []string{"200", "400", "500"} + + // Generate target_info metrics with instance and job labels, and k8s_cluster_name label. + // Generate http_server_request_duration_seconds_count metrics with instance and job labels, and http_status_code label. + // the classic target_info metrics is gauge type. + metrics := make([]labels.Labels, 0, infoSeriesNum+len(statusCodes)) + for i := 0; i < infoSeriesNum; i++ { + clusterName := "us-east" + if i >= infoSeriesNum/2 { + clusterName = "eu-south" + } + metrics = append(metrics, labels.FromStrings( + "__name__", "target_info", + "instance", "instance"+strconv.Itoa(i), + "job", "job"+strconv.Itoa(i), + "k8s_cluster_name", clusterName, + )) + } + + for _, statusCode := range statusCodes { + metrics = append(metrics, labels.FromStrings( + "__name__", "http_server_request_duration_seconds_count", + "instance", "instance0", + "job", "job0", + "http_status_code", statusCode, + )) + } + + // Append the generated metrics and samples to the storage. + refs := make([]storage.SeriesRef, len(metrics)) + + for i := 0; i < numIntervals; i++ { + a := stor.Appender(context.Background()) + ts := int64(i * interval) + for j, metric := range metrics[:infoSeriesNum] { + ref, _ := a.Append(refs[j], metric, ts, 1) + refs[j] = ref + } + + for j, metric := range metrics[infoSeriesNum:] { + ref, _ := a.Append(refs[j+infoSeriesNum], metric, ts, float64(i)) + refs[j+infoSeriesNum] = ref + } + + require.NoError(tb, a.Commit()) + } + + stor.DB.ForceHeadMMap() // Ensure we have at most one head chunk for every series. + stor.DB.Compact(ctx) +} + func generateNativeHistogramSeries(app storage.Appender, numSeries int) error { commonLabels := []string{labels.MetricName, "native_histogram_series", "foo", "bar"} series := make([][]*histogram.Histogram, numSeries) diff --git a/promql/engine.go b/promql/engine.go index 60b4b8138..cc7ff694e 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -739,6 +739,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(start, start, 1) @@ -797,6 +798,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) @@ -1069,6 +1071,7 @@ type evaluator struct { samplesStats *stats.QuerySamples noStepSubqueryIntervalFn func(rangeMillis int64) int64 enableDelayedNameRemoval bool + querier storage.Querier } // errorf causes a panic with the input formatted into an error. @@ -1230,38 +1233,17 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label ev.currentSamples = tempNumSamples // Gather input vectors for this timestamp. for i := range exprs { - vectors[i] = vectors[i][:0] - + var bh []EvalSeriesHelper + var sh []EvalSeriesHelper if prepSeries != nil { - bufHelpers[i] = bufHelpers[i][:0] - } - - for si, series := range matrixes[i] { - switch { - case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) - // Move input vectors forward so we don't have to re-scan the same - // past points at the next step. - matrixes[i][si].Floats = series.Floats[1:] - case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) - matrixes[i][si].Histograms = series.Histograms[1:] - default: - continue - } - if prepSeries != nil { - bufHelpers[i] = append(bufHelpers[i], seriesHelpers[i][si]) - } - // Don't add histogram size here because we only - // copy the pointer above, not the whole - // histogram. - ev.currentSamples++ - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + bh = bufHelpers[i][:0] + sh = seriesHelpers[i] } + vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) args[i] = vectors[i] - ev.samplesStats.UpdatePeak(ev.currentSamples) + if prepSeries != nil { + bufHelpers[i] = bh + } } // Make the function call. @@ -1462,19 +1444,18 @@ func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.Aggregate return result, warnings } -// evalVectorSelector generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from vs. -// vs.Series has to be expanded before calling this method. -// For every series iterator in vs.Series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, +// evalSeries generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from series given offset. +// For every storage.Series iterator in series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, // collecting every corresponding sample (obtained via ev.vectorSelectorSingle) into a Series. // All of the generated Series are collected into a Matrix, that gets returned. -func (ev *evaluator) evalVectorSelector(ctx context.Context, vs *parser.VectorSelector) Matrix { +func (ev *evaluator) evalSeries(ctx context.Context, series []storage.Series, offset time.Duration, recordOrigT bool) Matrix { numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 - mat := make(Matrix, 0, len(vs.Series)) + mat := make(Matrix, 0, len(series)) var prevSS *Series it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) var chkIter chunkenc.Iterator - for _, s := range vs.Series { + for _, s := range series { if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } @@ -1487,7 +1468,7 @@ func (ev *evaluator) evalVectorSelector(ctx context.Context, vs *parser.VectorSe for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { step++ - _, f, h, ok := ev.vectorSelectorSingle(it, vs, ts) + origT, f, h, ok := ev.vectorSelectorSingle(it, offset, ts) if !ok { continue } @@ -1501,8 +1482,18 @@ func (ev *evaluator) evalVectorSelector(ctx context.Context, vs *parser.VectorSe if ss.Floats == nil { ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) } + if recordOrigT { + // This is an info metric, where we want to track the original sample timestamp. + // Info metric values should be 1 by convention, therefore we can re-use this + // space in the sample. + f = float64(origT) + } ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) } else { + if recordOrigT { + ev.error(fmt.Errorf("this should be an info metric, with float samples: %s", ss.Metric)) + } + point := HPoint{H: h, T: ts} histSize := point.size() ev.currentSamples += histSize @@ -1672,6 +1663,8 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, return ev.evalLabelReplace(ctx, e.Args) case "label_join": return ev.evalLabelJoin(ctx, e.Args) + case "info": + return ev.evalInfo(ctx, e.Args) } if !matrixArg { @@ -1963,7 +1956,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } - mat := ev.evalVectorSelector(ctx, e) + mat := ev.evalSeries(ctx, e.Series, e.Offset, false) return mat, ws case *parser.MatrixSelector: @@ -1984,6 +1977,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } if e.Step != 0 { @@ -2028,6 +2022,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples @@ -2052,7 +2047,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, } for i := range mat { if len(mat[i].Floats)+len(mat[i].Histograms) != 1 { - panic(fmt.Errorf("unexpected number of samples")) + panic(errors.New("unexpected number of samples")) } for ts := ev.startTimestamp + ev.interval; ts <= ev.endTimestamp; ts += ev.interval { if len(mat[i].Floats) > 0 { @@ -2128,7 +2123,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + t, _, _, ok := ev.vectorSelectorSingle(it, vs.Offset, enh.Ts) if !ok { continue } @@ -2152,10 +2147,10 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co } // vectorSelectorSingle evaluates an instant vector for the iterator of one time series. -func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, node *parser.VectorSelector, ts int64) ( +func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, offset time.Duration, ts int64) ( int64, float64, *histogram.FloatHistogram, bool, ) { - refTime := ts - durationMilliseconds(node.Offset) + refTime := ts - durationMilliseconds(offset) var t int64 var v float64 var h *histogram.FloatHistogram @@ -2926,7 +2921,15 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} @@ -3623,7 +3626,7 @@ func detectHistogramStatsDecoding(expr parser.Expr) { if n, ok := node.(*parser.BinaryExpr); ok { detectHistogramStatsDecoding(n.LHS) detectHistogramStatsDecoding(n.RHS) - return fmt.Errorf("stop") + return errors.New("stop") } n, ok := (node).(*parser.VectorSelector) @@ -3645,7 +3648,7 @@ func detectHistogramStatsDecoding(expr parser.Expr) { break } } - return fmt.Errorf("stop") + return errors.New("stop") }) } @@ -3716,3 +3719,41 @@ func newHistogramStatsSeries(series storage.Series) *histogramStatsSeries { func (s histogramStatsSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { return NewHistogramStatsIterator(s.Series.Iterator(it)) } + +// gatherVector gathers a Vector for ts from the series in input. +// output is used as a buffer. +// If bufHelpers and seriesHelpers are provided, seriesHelpers[i] is appended to bufHelpers for every input index i. +// The gathered Vector and bufHelper are returned. +func (ev *evaluator) gatherVector(ts int64, input Matrix, output Vector, bufHelpers, seriesHelpers []EvalSeriesHelper) (Vector, []EvalSeriesHelper) { + output = output[:0] + for i, series := range input { + switch { + case len(series.Floats) > 0 && series.Floats[0].T == ts: + s := series.Floats[0] + output = append(output, Sample{Metric: series.Metric, F: s.F, T: ts, DropName: series.DropName}) + // Move input vectors forward so we don't have to re-scan the same + // past points at the next step. + input[i].Floats = series.Floats[1:] + case len(series.Histograms) > 0 && series.Histograms[0].T == ts: + s := series.Histograms[0] + output = append(output, Sample{Metric: series.Metric, H: s.H, T: ts, DropName: series.DropName}) + input[i].Histograms = series.Histograms[1:] + default: + continue + } + if len(seriesHelpers) > 0 { + bufHelpers = append(bufHelpers, seriesHelpers[i]) + } + + // Don't add histogram size here because we only + // copy the pointer above, not the whole + // histogram. + ev.currentSamples++ + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + + return output, bufHelpers +} diff --git a/promql/engine_test.go b/promql/engine_test.go index 7c398029f..6dbfde6d8 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -3928,3 +3928,65 @@ func (s mockSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { } return storage.ChainSampleIteratorFromIterators(it, iterables) } + +func TestEvaluationWithDelayedNameRemovalDisabled(t *testing.T) { + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + EnableAtModifier: true, + MaxSamples: 10000, + Timeout: 10 * time.Second, + EnableDelayedNameRemoval: false, + } + engine := promqltest.NewTestEngineWithOpts(t, opts) + + promqltest.RunTest(t, ` +load 5m + metric{env="1"} 0 60 120 + another_metric{env="1"} 60 120 180 + +# Does not drop __name__ for vector selector +eval instant at 10m metric{env="1"} + metric{env="1"} 120 + +# Drops __name__ for unary operators +eval instant at 10m -metric + {env="1"} -120 + +# Drops __name__ for binary operators +eval instant at 10m metric + another_metric + {env="1"} 300 + +# Does not drop __name__ for binary comparison operators +eval instant at 10m metric <= another_metric + metric{env="1"} 120 + +# Drops __name__ for binary comparison operators with "bool" modifier +eval instant at 10m metric <= bool another_metric + {env="1"} 1 + +# Drops __name__ for vector-scalar operations +eval instant at 10m metric * 2 + {env="1"} 240 + +# Drops __name__ for instant-vector functions +eval instant at 10m clamp(metric, 0, 100) + {env="1"} 100 + +# Drops __name__ for round function +eval instant at 10m round(metric) + {env="1"} 120 + +# Drops __name__ for range-vector functions +eval instant at 10m rate(metric{env="1"}[10m]) + {env="1"} 0.2 + +# Does not drop __name__ for last_over_time function +eval instant at 10m last_over_time(metric{env="1"}[10m]) + metric{env="1"} 120 + +# Drops name for other _over_time functions +eval instant at 10m max_over_time(metric{env="1"}[10m]) + {env="1"} 120 +`, engine) +} diff --git a/promql/functions.go b/promql/functions.go index 26ec31c5f..009a370eb 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -415,22 +415,12 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(a.Metric, b.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -442,7 +432,8 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return +1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -450,22 +441,12 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(b.Metric, a.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -477,7 +458,8 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return -1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return -labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -563,7 +545,14 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper toNearestInverse := 1.0 / toNearest for _, el := range vec { + if el.H != nil { + // Process only float samples. + continue + } f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ Metric: el.Metric, F: f, @@ -1697,6 +1686,7 @@ var FunctionCalls = map[string]FunctionCall{ "hour": funcHour, "idelta": funcIdelta, "increase": funcIncrease, + "info": nil, "irate": funcIrate, "label_replace": nil, // evalLabelReplace not called via this map. "label_join": nil, // evalLabelJoin not called via this map. diff --git a/promql/fuzz.go b/promql/fuzz.go index 57fd1166a..759055fb0 100644 --- a/promql/fuzz.go +++ b/promql/fuzz.go @@ -61,8 +61,8 @@ const ( var symbolTable = labels.NewSymbolTable() func fuzzParseMetricWithContentType(in []byte, contentType string) int { - p, warning := textparse.New(in, contentType, false, false, symbolTable) - if warning != nil { + p, warning := textparse.New(in, contentType, "", false, false, symbolTable) + if p == nil || warning != nil { // An invalid content type is being passed, which should not happen // in this context. panic(warning) @@ -91,7 +91,7 @@ func fuzzParseMetricWithContentType(in []byte, contentType string) int { // Note that this is not the parser for the text-based exposition-format; that // lives in github.com/prometheus/client_golang/text. func FuzzParseMetric(in []byte) int { - return fuzzParseMetricWithContentType(in, "") + return fuzzParseMetricWithContentType(in, "text/plain") } func FuzzParseOpenMetric(in []byte) int { diff --git a/promql/info.go b/promql/info.go new file mode 100644 index 000000000..3fe9a2ce9 --- /dev/null +++ b/promql/info.go @@ -0,0 +1,454 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/grafana/regexp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/annotations" +) + +const targetInfo = "target_info" + +// identifyingLabels are the labels we consider as identifying for info metrics. +// Currently hard coded, so we don't need knowledge of individual info metrics. +var identifyingLabels = []string{"instance", "job"} + +// evalInfo implements the info PromQL function. +func (ev *evaluator) evalInfo(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { + val, annots := ev.eval(ctx, args[0]) + mat := val.(Matrix) + // Map from data label name to matchers. + dataLabelMatchers := map[string][]*labels.Matcher{} + var infoNameMatchers []*labels.Matcher + if len(args) > 1 { + // TODO: Introduce a dedicated LabelSelector type. + labelSelector := args[1].(*parser.VectorSelector) + for _, m := range labelSelector.LabelMatchers { + dataLabelMatchers[m.Name] = append(dataLabelMatchers[m.Name], m) + if m.Name == labels.MetricName { + infoNameMatchers = append(infoNameMatchers, m) + } + } + } else { + infoNameMatchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)} + } + + // Don't try to enrich info series. + ignoreSeries := map[int]struct{}{} +loop: + for i, s := range mat { + name := s.Metric.Get(labels.MetricName) + for _, m := range infoNameMatchers { + if m.Matches(name) { + ignoreSeries[i] = struct{}{} + continue loop + } + } + } + + selectHints := ev.infoSelectHints(args[0]) + infoSeries, ws, err := ev.fetchInfoSeries(ctx, mat, ignoreSeries, dataLabelMatchers, selectHints) + if err != nil { + ev.error(err) + } + annots.Merge(ws) + + res, ws := ev.combineWithInfoSeries(ctx, mat, infoSeries, ignoreSeries, dataLabelMatchers) + annots.Merge(ws) + return res, annots +} + +// infoSelectHints calculates the storage.SelectHints for selecting info series, given expr (first argument to info call). +func (ev *evaluator) infoSelectHints(expr parser.Expr) storage.SelectHints { + var nodeTimestamp *int64 + var offset int64 + parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { + switch n := node.(type) { + case *parser.VectorSelector: + if n.Timestamp != nil { + nodeTimestamp = n.Timestamp + } + offset = durationMilliseconds(n.OriginalOffset) + return errors.New("end traversal") + default: + return nil + } + }) + + start := ev.startTimestamp + end := ev.endTimestamp + if nodeTimestamp != nil { + // The timestamp on the selector overrides everything. + start = *nodeTimestamp + end = *nodeTimestamp + } + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(ev.lookbackDelta) - 1 + start -= offset + end -= offset + + return storage.SelectHints{ + Start: start, + End: end, + Step: ev.interval, + Func: "info", + } +} + +// fetchInfoSeries fetches info series given matching identifying labels in mat. +// Series in ignoreSeries are not fetched. +// dataLabelMatchers may be mutated. +func (ev *evaluator) fetchInfoSeries(ctx context.Context, mat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher, selectHints storage.SelectHints) (Matrix, annotations.Annotations, error) { + // A map of values for all identifying labels we are interested in. + idLblValues := map[string]map[string]struct{}{} + for i, s := range mat { + if _, exists := ignoreSeries[i]; exists { + continue + } + + // Register relevant values per identifying label for this series. + for _, l := range identifyingLabels { + val := s.Metric.Get(l) + if val == "" { + continue + } + + if idLblValues[l] == nil { + idLblValues[l] = map[string]struct{}{} + } + idLblValues[l][val] = struct{}{} + } + } + if len(idLblValues) == 0 { + return nil, nil, nil + } + + // Generate regexps for every interesting value per identifying label. + var sb strings.Builder + idLblRegexps := make(map[string]string, len(idLblValues)) + for name, vals := range idLblValues { + sb.Reset() + i := 0 + for v := range vals { + if i > 0 { + sb.WriteRune('|') + } + sb.WriteString(regexp.QuoteMeta(v)) + i++ + } + idLblRegexps[name] = sb.String() + } + + var infoLabelMatchers []*labels.Matcher + for name, re := range idLblRegexps { + infoLabelMatchers = append(infoLabelMatchers, labels.MustNewMatcher(labels.MatchRegexp, name, re)) + } + var nameMatcher *labels.Matcher + for name, ms := range dataLabelMatchers { + for i, m := range ms { + if m.Name == labels.MetricName { + nameMatcher = m + ms = slices.Delete(ms, i, i+1) + } + infoLabelMatchers = append(infoLabelMatchers, m) + } + if len(ms) > 0 { + dataLabelMatchers[name] = ms + } else { + delete(dataLabelMatchers, name) + } + } + if nameMatcher == nil { + // Default to using the target_info metric. + infoLabelMatchers = append([]*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)}, infoLabelMatchers...) + } + + infoIt := ev.querier.Select(ctx, false, &selectHints, infoLabelMatchers...) + infoSeries, ws, err := expandSeriesSet(ctx, infoIt) + if err != nil { + return nil, ws, err + } + + infoMat := ev.evalSeries(ctx, infoSeries, 0, true) + return infoMat, ws, nil +} + +// combineWithInfoSeries combines mat with select data labels from infoMat. +func (ev *evaluator) combineWithInfoSeries(ctx context.Context, mat, infoMat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher) (Matrix, annotations.Annotations) { + buf := make([]byte, 0, 1024) + lb := labels.NewScratchBuilder(0) + sigFunction := func(name string) func(labels.Labels) string { + return func(lset labels.Labels) string { + lb.Reset() + lb.Add(labels.MetricName, name) + lset.MatchLabels(true, identifyingLabels...).Range(func(l labels.Label) { + lb.Add(l.Name, l.Value) + }) + lb.Sort() + return string(lb.Labels().Bytes(buf)) + } + } + + infoMetrics := map[string]struct{}{} + for _, is := range infoMat { + lblMap := is.Metric.Map() + infoMetrics[lblMap[labels.MetricName]] = struct{}{} + } + sigfs := make(map[string]func(labels.Labels) string, len(infoMetrics)) + for name := range infoMetrics { + sigfs[name] = sigFunction(name) + } + + // Keep a copy of the original point slices so they can be returned to the pool. + origMatrices := []Matrix{ + make(Matrix, len(mat)), + make(Matrix, len(infoMat)), + } + copy(origMatrices[0], mat) + copy(origMatrices[1], infoMat) + + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + originalNumSamples := ev.currentSamples + + // Create an output vector that is as big as the input matrix with + // the most time series. + biggestLen := max(len(mat), len(infoMat)) + baseVector := make(Vector, 0, len(mat)) + infoVector := make(Vector, 0, len(infoMat)) + enh := &EvalNodeHelper{ + Out: make(Vector, 0, biggestLen), + } + type seriesAndTimestamp struct { + Series + ts int64 + } + seriess := make(map[uint64]seriesAndTimestamp, biggestLen) // Output series by series hash. + tempNumSamples := ev.currentSamples + + // For every base series, compute signature per info metric. + baseSigs := make([]map[string]string, 0, len(mat)) + for _, s := range mat { + sigs := make(map[string]string, len(infoMetrics)) + for infoName := range infoMetrics { + sigs[infoName] = sigfs[infoName](s.Metric) + } + baseSigs = append(baseSigs, sigs) + } + + infoSigs := make([]string, 0, len(infoMat)) + for _, s := range infoMat { + name := s.Metric.Map()[labels.MetricName] + infoSigs = append(infoSigs, sigfs[name](s.Metric)) + } + + var warnings annotations.Annotations + for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + // Reset number of samples in memory after each timestamp. + ev.currentSamples = tempNumSamples + // Gather input vectors for this timestamp. + baseVector, _ = ev.gatherVector(ts, mat, baseVector, nil, nil) + infoVector, _ = ev.gatherVector(ts, infoMat, infoVector, nil, nil) + + enh.Ts = ts + result, err := ev.combineWithInfoVector(baseVector, infoVector, ignoreSeries, baseSigs, infoSigs, enh, dataLabelMatchers) + if err != nil { + ev.error(err) + } + enh.Out = result[:0] // Reuse result vector. + + vecNumSamples := result.TotalSamples() + ev.currentSamples += vecNumSamples + // When we reset currentSamples to tempNumSamples during the next iteration of the loop it also + // needs to include the samples from the result here, as they're still in memory. + tempNumSamples += vecNumSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + + // Add samples in result vector to output series. + for _, sample := range result { + h := sample.Metric.Hash() + ss, exists := seriess[h] + if exists { + if ss.ts == ts { // If we've seen this output series before at this timestamp, it's a duplicate. + ev.errorf("vector cannot contain metrics with the same labelset") + } + ss.ts = ts + } else { + ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts} + } + addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps) + seriess[h] = ss + } + } + + // Reuse the original point slices. + for _, m := range origMatrices { + for _, s := range m { + putFPointSlice(s.Floats) + putHPointSlice(s.Histograms) + } + } + // Assemble the output matrix. By the time we get here we know we don't have too many samples. + numSamples := 0 + output := make(Matrix, 0, len(seriess)) + for _, ss := range seriess { + numSamples += len(ss.Floats) + totalHPointSize(ss.Histograms) + output = append(output, ss.Series) + } + ev.currentSamples = originalNumSamples + numSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + return output, warnings +} + +// combineWithInfoVector combines base and info Vectors. +// Base series in ignoreSeries are not combined. +func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[int]struct{}, baseSigs []map[string]string, infoSigs []string, enh *EvalNodeHelper, dataLabelMatchers map[string][]*labels.Matcher) (Vector, error) { + if len(base) == 0 { + return nil, nil // Short-circuit: nothing is going to match. + } + + // All samples from the info Vector hashed by the matching label/values. + if enh.rightSigs == nil { + enh.rightSigs = make(map[string]Sample, len(enh.Out)) + } else { + clear(enh.rightSigs) + } + + for i, s := range info { + if s.H != nil { + ev.error(errors.New("info sample should be float")) + } + // We encode original info sample timestamps via the float value. + origT := int64(s.F) + + sig := infoSigs[i] + if existing, exists := enh.rightSigs[sig]; exists { + // We encode original info sample timestamps via the float value. + existingOrigT := int64(existing.F) + switch { + case existingOrigT > origT: + // Keep the other info sample, since it's newer. + case existingOrigT < origT: + // Keep this info sample, since it's newer. + enh.rightSigs[sig] = s + default: + // The two info samples have the same timestamp - conflict. + name := s.Metric.Map()[labels.MetricName] + ev.errorf("found duplicate series for info metric %s", name) + } + } else { + enh.rightSigs[sig] = s + } + } + + for i, bs := range base { + if _, exists := ignoreSeries[i]; exists { + // This series should not be enriched with info metric data labels. + enh.Out = append(enh.Out, Sample{ + Metric: bs.Metric, + F: bs.F, + H: bs.H, + }) + continue + } + + baseLabels := bs.Metric.Map() + enh.resetBuilder(labels.Labels{}) + + // For every info metric name, try to find an info series with the same signature. + seenInfoMetrics := map[string]struct{}{} + for infoName, sig := range baseSigs[i] { + is, exists := enh.rightSigs[sig] + if !exists { + continue + } + if _, exists := seenInfoMetrics[infoName]; exists { + continue + } + + err := is.Metric.Validate(func(l labels.Label) error { + if l.Name == labels.MetricName { + return nil + } + if _, exists := dataLabelMatchers[l.Name]; len(dataLabelMatchers) > 0 && !exists { + // Not among the specified data label matchers. + return nil + } + + if v := enh.lb.Get(l.Name); v != "" && v != l.Value { + return fmt.Errorf("conflicting label: %s", l.Name) + } + if _, exists := baseLabels[l.Name]; exists { + // Skip labels already on the base metric. + return nil + } + + enh.lb.Set(l.Name, l.Value) + return nil + }) + if err != nil { + return nil, err + } + seenInfoMetrics[infoName] = struct{}{} + } + + infoLbls := enh.lb.Labels() + if infoLbls.Len() == 0 { + // If there's at least one data label matcher not matching the empty string, + // we have to ignore this series as there are no matching info series. + allMatchersMatchEmpty := true + for _, ms := range dataLabelMatchers { + for _, m := range ms { + if !m.Matches("") { + allMatchersMatchEmpty = false + break + } + } + } + if !allMatchersMatchEmpty { + continue + } + } + + enh.resetBuilder(bs.Metric) + infoLbls.Range(func(l labels.Label) { + enh.lb.Set(l.Name, l.Value) + }) + + enh.Out = append(enh.Out, Sample{ + Metric: enh.lb.Labels(), + F: bs.F, + H: bs.H, + }) + } + return enh.Out, nil +} diff --git a/promql/info_test.go b/promql/info_test.go new file mode 100644 index 000000000..2e7a67172 --- /dev/null +++ b/promql/info_test.go @@ -0,0 +1,140 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql_test + +import ( + "testing" + + "github.com/prometheus/prometheus/promql/promqltest" +) + +// The "info" function is experimental. This is why we write those tests here for now instead of promqltest/testdata/info.test. +func TestInfo(t *testing.T) { + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + promqltest.RunTest(t, ` +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + metric_with_overlapping_label{instance="a", job="1", label="value", data="base"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 1 + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +# Include one info metric data label. +eval range from 0m to 10m step 5m info(metric, {data=~".+"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Include all info metric data labels. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try including all info metric data labels, but non-matching identifying labels. +eval range from 0m to 10m step 5m info(metric_not_matching_target_info) + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + +# Try including a certain info metric data label with a non-matching matcher not accepting empty labels. +# Metric is ignored, due there being a data label matcher not matching empty labels, +# and there being no info series matches. +eval range from 0m to 10m step 5m info(metric, {non_existent=~".+"}) + +# Include a certain info metric data label together with a non-matching matcher accepting empty labels. +# Since the non_existent matcher matches empty labels, it's simply ignored when there's no match. +# XXX: This case has to include a matcher not matching empty labels, due the PromQL limitation +# that vector selectors have to contain at least one matcher not accepting empty labels. +# We might need another construct than vector selector to get around this limitation. +eval range from 0m to 10m step 5m info(metric, {data=~".+", non_existent=~".*"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Info series data labels overlapping with those of base series are ignored. +eval range from 0m to 10m step 5m info(metric_with_overlapping_label) + metric_with_overlapping_label{data="base", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Include data labels from target_info specifically. +eval range from 0m to 10m step 5m info(metric, {__name__="target_info"}) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try to include all data labels from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent"}) + metric{instance="a", job="1", label="value"} 0 1 2 + +# Try to include a certain data label from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent", data=~".+"}) + +# Include data labels from build_info. +eval range from 0m to 10m step 5m info(metric, {__name__="build_info"}) + metric{instance="a", job="1", label="value", build_data="build"} 0 1 2 + +# Include data labels from build_info and target_info. +eval range from 0m to 10m step 5m info(metric, {__name__=~".+_info"}) + metric{instance="a", job="1", label="value", build_data="build", data="info", another_data="another info"} 0 1 2 + +# Info metrics themselves are ignored when it comes to enriching with info metric data labels. +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", build_data=~".+"}) + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +clear + +# Overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 _ + target_info{instance="a", job="1", data="updated info", another_data="another info"} _ _ 1 + +# Conflicting info series are resolved through picking the latest sample. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value", another_data="another info"} _ _ 2 + +clear + +# Non-overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info"} 1 1 stale + target_info{instance="a", job="1", data="updated info"} _ _ 1 + +# Include info metric data labels from a metric which data labels change over time. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value"} _ _ 2 + +clear + +# Info series selector matches histogram series, info metrics should be float type. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + histogram{instance="a", job="1"} {{schema:1 sum:3 count:22 buckets:[5 10 7]}} + +eval_fail range from 0m to 10m step 5m info(metric, {__name__="histogram"}) + +clear + +# Series with skipped scrape. +load 1m + metric{instance="a", job="1", label="value"} 0 _ 2 3 4 + target_info{instance="a", job="1", data="info"} 1 _ 1 1 1 + +# Lookback works also for the info series. +eval range from 1m to 4m step 1m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 2 3 4 + +# @ operator works also with info. +# Note that we pick the timestamp missing a sample, lookback should pick previous sample. +eval range from 1m to 4m step 1m info(metric @ 60) + metric{data="info", instance="a", job="1", label="value"} 0 0 0 0 + +# offset operator works also with info. +eval range from 1m to 4m step 1m info(metric offset 1m) + metric{data="info", instance="a", job="1", label="value"} 0 0 2 3 +`, engine) +} diff --git a/promql/parser/ast.go b/promql/parser/ast.go index 162d7817a..132ef3f0d 100644 --- a/promql/parser/ast.go +++ b/promql/parser/ast.go @@ -208,6 +208,10 @@ type VectorSelector struct { UnexpandedSeriesSet storage.SeriesSet Series []storage.Series + // BypassEmptyMatcherCheck is true when the VectorSelector isn't required to have at least one matcher matching the empty string. + // This is the case when VectorSelector is used to represent the info function's second argument. + BypassEmptyMatcherCheck bool + PosRange posrange.PositionRange } diff --git a/promql/parser/functions.go b/promql/parser/functions.go index 9d7b56053..aa65aca27 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -224,6 +224,13 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeMatrix}, ReturnType: ValueTypeVector, }, + "info": { + Name: "info", + ArgTypes: []ValueType{ValueTypeVector, ValueTypeVector}, + ReturnType: ValueTypeVector, + Experimental: true, + Variadic: 1, + }, "irate": { Name: "irate", ArgTypes: []ValueType{ValueTypeMatrix}, diff --git a/promql/parser/parse.go b/promql/parser/parse.go index ae558dccc..05549eaac 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -784,6 +784,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { } } + if n.Func.Name == "info" && len(n.Args) > 1 { + // Check the type is correct first + if n.Args[1].Type() != ValueTypeVector { + p.addParseErrf(node.PositionRange(), "expected type %s in %s, got %s", DocumentedType(ValueTypeVector), fmt.Sprintf("call to function %q", n.Func.Name), DocumentedType(n.Args[1].Type())) + } + // Check the vector selector in the input doesn't contain a metric name + if n.Args[1].(*VectorSelector).Name != "" { + p.addParseErrf(n.Args[1].PositionRange(), "expected label selectors only, got vector selector instead") + } + // Set Vector Selector flag to bypass empty matcher check + n.Args[1].(*VectorSelector).BypassEmptyMatcherCheck = true + } + for i, arg := range n.Args { if i >= len(n.Func.ArgTypes) { if n.Func.Variadic == 0 { @@ -830,17 +843,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { // metric name is a non-empty matcher. break } - // A Vector selector must contain at least one non-empty matcher to prevent - // implicit selection of all metrics (e.g. by a typo). - notEmpty := false - for _, lm := range n.LabelMatchers { - if lm != nil && !lm.Matches("") { - notEmpty = true - break + if !n.BypassEmptyMatcherCheck { + // A Vector selector must contain at least one non-empty matcher to prevent + // implicit selection of all metrics (e.g. by a typo). + notEmpty := false + for _, lm := range n.LabelMatchers { + if lm != nil && !lm.Matches("") { + notEmpty = true + break + } + } + if !notEmpty { + p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } - } - if !notEmpty { - p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } case *NumberLiteral, *StringLiteral: diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index b5096b777..0e5e2f638 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -3872,6 +3872,81 @@ var testExpr = []struct { }, }, }, + { + input: `info(rate(http_request_counter_total{}[5m]))`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &Call{ + Func: MustGetFunction("rate"), + PosRange: posrange.PositionRange{ + Start: 5, + End: 43, + }, + Args: Expressions{ + &MatrixSelector{ + VectorSelector: &VectorSelector{ + Name: "http_request_counter_total", + OriginalOffset: 0, + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 10, + End: 38, + }, + }, + EndPos: 42, + Range: 5 * time.Minute, + }, + }, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 44, + }, + }, + }, + { + input: `info(rate(http_request_counter_total{}[5m]), target_info{foo="bar"})`, + fail: true, + errMsg: `1:46: parse error: expected label selectors only, got vector selector instead`, + }, + { + input: `info(http_request_counter_total{namespace="zzz"}, {foo="bar", bar="baz"})`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &VectorSelector{ + Name: "http_request_counter_total", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "namespace", "zzz"), + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 48, + }, + }, + &VectorSelector{ + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "foo", "bar"), + MustLabelMatcher(labels.MatchEqual, "bar", "baz"), + }, + PosRange: posrange.PositionRange{ + Start: 50, + End: 72, + }, + BypassEmptyMatcherCheck: true, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 73, + }, + }, + }, } func makeInt64Pointer(val int64) *int64 { @@ -3889,6 +3964,12 @@ func readable(s string) string { } func TestParseExpressions(t *testing.T) { + // Enable experimental functions testing. + EnableExperimentalFunctions = true + t.Cleanup(func() { + EnableExperimentalFunctions = false + }) + model.NameValidationScheme = model.UTF8Validation for _, test := range testExpr { t.Run(readable(test.input), func(t *testing.T) { diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index e078bcb60..4cbd39b40 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -482,18 +482,16 @@ func (cmd *loadCmd) append(a storage.Appender) error { type tempHistogramWrapper struct { metric labels.Labels - upperBounds []float64 histogramByTs map[int64]convertnhcb.TempHistogram } func newTempHistogramWrapper() tempHistogramWrapper { return tempHistogramWrapper{ - upperBounds: []float64{}, histogramByTs: map[int64]convertnhcb.TempHistogram{}, } } -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) { +func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogram func(*convertnhcb.TempHistogram, float64)) { m2 := convertnhcb.GetHistogramMetricBase(m, suffix) m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] @@ -501,9 +499,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap histogramWrapper = newTempHistogramWrapper() } histogramWrapper.metric = m2 - if updateHistogramWrapper != nil { - updateHistogramWrapper(&histogramWrapper) - } for _, s := range smpls { if s.H != nil { continue @@ -534,18 +529,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { if err != nil || math.IsNaN(le) { continue } - processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { - histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) - }, func(histogram *convertnhcb.TempHistogram, f float64) { - histogram.BucketCounts[le] = f + processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetBucketCount(le, f) }) case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { - histogram.Count = f + processClassicHistogramSeries(m, "_count", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetCount(f) }) case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { - histogram.Sum = f + processClassicHistogramSeries(m, "_sum", histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetSum(f) }) } } @@ -553,11 +546,12 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true) - fhBase := hBase.ToFloat(nil) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase) + h, fh, err := histogram.Convert() + if err != nil { + return err + } if fh == nil { if err := h.Validate(); err != nil { return err @@ -672,7 +666,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { switch val := result.(type) { case promql.Matrix: if ev.ordered { - return fmt.Errorf("expected ordered result, but query returned a matrix") + return errors.New("expected ordered result, but query returned a matrix") } if ev.expectScalar { diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 3c9188396..e2eb381db 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -572,3 +572,160 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 46a88cd94..23c56565f 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -1273,3 +1273,12 @@ load 1m # We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. eval range from 0 to 61s step 1s timestamp(metric) {} 0x59 60 60 + +clear + +# Check round with mixed data types +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +eval range from 0 to 5m step 1m round(mixed_metric) + {} _ 1 2 3 diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 5f5dcd5e4..d4a2ad257 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -31,6 +31,10 @@ eval instant at 10m metric * 2 eval instant at 10m clamp(metric, 0, 100) {env="1"} 100 +# Drops __name__ for round function +eval instant at 10m round(metric) + {env="1"} 120 + # Drops __name__ for range-vector functions eval instant at 10m rate(metric{env="1"}[10m]) {env="1"} 0.2 diff --git a/rules/alerting.go b/rules/alerting.go index 7e74c176a..e7f15baef 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -15,6 +15,7 @@ package rules import ( "context" + "errors" "fmt" "log/slog" "net/url" @@ -403,7 +404,7 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t resultFPs[h] = struct{}{} if _, ok := alerts[h]; ok { - return nil, fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + return nil, errors.New("vector contains metrics with the same labelset after applying alert labels") } alerts[h] = &Alert{ diff --git a/rules/fixtures/rules1.yaml b/rules/fixtures/rules1.yaml new file mode 100644 index 000000000..76fbf71f3 --- /dev/null +++ b/rules/fixtures/rules1.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_1 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/group.go b/rules/group.go index e9ef2be3a..7dd046b57 100644 --- a/rules/group.go +++ b/rules/group.go @@ -75,6 +75,7 @@ type Group struct { // concurrencyController controls the rules evaluation concurrency. concurrencyController RuleConcurrencyController + appOpts *storage.AppendOptions } // GroupEvalIterationFunc is used to implement and extend rule group @@ -145,6 +146,7 @@ func NewGroup(o GroupOptions) *Group { metrics: metrics, evalIterationFunc: evalIterationFunc, concurrencyController: concurrencyController, + appOpts: &storage.AppendOptions{DiscardOutOfOrder: true}, } } @@ -564,6 +566,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if s.H != nil { _, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H) } else { + app.SetOptions(g.appOpts) _, err = app.Append(0, s.Metric, s.T, s.F) } @@ -660,6 +663,7 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { return } app := g.opts.Appendable.Appender(ctx) + app.SetOptions(g.appOpts) queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. diff --git a/rules/manager.go b/rules/manager.go index 2e89184bd..6e9bf6469 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -316,13 +316,15 @@ func (m *Manager) LoadGroups( return nil, []error{fmt.Errorf("%s: %w", fn, err)} } + mLabels := FromMaps(rg.Labels, r.Labels) + if r.Alert.Value != "" { rules = append(rules, NewAlertingRule( r.Alert.Value, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), - labels.FromMap(r.Labels), + mLabels, labels.FromMap(r.Annotations), externalLabels, externalURL, @@ -334,7 +336,7 @@ func (m *Manager) LoadGroups( rules = append(rules, NewRecordingRule( r.Record.Value, expr, - labels.FromMap(r.Labels), + mLabels, )) } @@ -505,3 +507,16 @@ func (c sequentialRuleEvalController) Allow(_ context.Context, _ *Group, _ Rule) } func (c sequentialRuleEvalController) Done(_ context.Context) {} + +// FromMaps returns new sorted Labels from the given maps, overriding each other in order. +func FromMaps(maps ...map[string]string) labels.Labels { + mLables := make(map[string]string) + + for _, m := range maps { + for k, v := range m { + mLables[k] = v + } + } + + return labels.FromMap(mLables) +} diff --git a/rules/manager_test.go b/rules/manager_test.go index ffb15952b..6afac993d 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -855,10 +855,11 @@ type ruleGroupsTest struct { // ruleGroupTest forms a testing struct for running tests over rules. type ruleGroupTest struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []rulefmt.Rule `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []rulefmt.Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { @@ -881,6 +882,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { Interval: g.Interval, Limit: g.Limit, Rules: rtmp, + Labels: g.Labels, }) } return ruleGroupsTest{ @@ -1193,6 +1195,53 @@ func countStaleNaN(t *testing.T, st storage.Storage) int { return c } +func TestRuleMovedBetweenGroups(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + storage := teststorage.New(t, 600000) + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + Queryable: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: promslog.NewNopLogger(), + }) + var stopped bool + ruleManager.start() + defer func() { + if !stopped { + ruleManager.Stop() + } + }() + + rule2 := "fixtures/rules2.yaml" + rule1 := "fixtures/rules1.yaml" + + // Load initial configuration of rules2 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule2}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated + time.Sleep(3 * time.Second) + + // Reload configuration of rules1 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule1}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated in new location and potential staleness marker + time.Sleep(3 * time.Second) + + require.Equal(t, 0, countStaleNaN(t, storage)) // Not expecting any stale markers. +} + func TestGroupHasAlertingRules(t *testing.T) { tests := []struct { group *Group @@ -2158,3 +2207,18 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I }, } } + +func TestLabels_FromMaps(t *testing.T) { + mLabels := FromMaps( + map[string]string{"aaa": "101", "bbb": "222"}, + map[string]string{"aaa": "111", "ccc": "333"}, + ) + + expected := labels.New( + labels.Label{Name: "aaa", Value: "111"}, + labels.Label{Name: "bbb", Value: "222"}, + labels.Label{Name: "ccc", Value: "333"}, + ) + + require.Equal(t, expected, mLabels, "unexpected labelset") +} diff --git a/rules/recording.go b/rules/recording.go index 17a75fdd1..52c2a875a 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -15,6 +15,7 @@ package rules import ( "context" + "errors" "fmt" "net/url" "time" @@ -103,7 +104,7 @@ func (rule *RecordingRule) Eval(ctx context.Context, queryOffset time.Duration, // Check that the rule does not produce identical metrics after applying // labels. if vector.ContainsSameLabelset() { - return nil, fmt.Errorf("vector contains metrics with the same labelset after applying rule labels") + return nil, errors.New("vector contains metrics with the same labelset after applying rule labels") } numSeries := len(vector) diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index 4f7918f79..12a56d707 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -43,6 +43,8 @@ func (a nopAppendable) Appender(_ context.Context) storage.Appender { type nopAppender struct{} +func (a nopAppender) SetOptions(opts *storage.AppendOptions) {} + func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { return 0, nil } @@ -114,6 +116,8 @@ type collectResultAppender struct { pendingMetadata []metadata.Metadata } +func (a *collectResultAppender) SetOptions(opts *storage.AppendOptions) {} + func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() diff --git a/scrape/manager.go b/scrape/manager.go index 8d22bd9d1..04da3162e 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -94,8 +94,6 @@ type Options struct { skipOffsetting bool } -const DefaultNameEscapingScheme = model.ValueEncodingEscaping - // Manager maintains a set of scrape pools and manages start/stop cycles // when receiving new target groups from the discovery manager. type Manager struct { @@ -178,6 +176,11 @@ func (m *Manager) reload() { m.logger.Error("error reloading target set", "err", "invalid config id:"+setName) continue } + if scrapeConfig.ConvertClassicHistogramsToNHCB && m.opts.EnableCreatedTimestampZeroIngestion { + // TODO(krajorama): fix https://github.com/prometheus/prometheus/issues/15137 + m.logger.Error("error reloading target set", "err", "cannot convert classic histograms to native histograms with custom buckets and ingest created timestamp zero samples at the same time due to https://github.com/prometheus/prometheus/issues/15137") + continue + } m.metrics.targetScrapePools.Inc() sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) if err != nil { diff --git a/scrape/manager_test.go b/scrape/manager_test.go index c3544f634..f446c9978 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -16,6 +16,7 @@ package scrape import ( "bytes" "context" + "errors" "fmt" "net/http" "net/http/httptest" @@ -898,7 +899,7 @@ func doOneScrape(t *testing.T, manager *Manager, appender *collectResultAppender if len(appender.resultFloats) > 0 { return nil } - return fmt.Errorf("expected some float samples, got none") + return errors.New("expected some float samples, got none") }), "after 1 minute") manager.Stop() } @@ -1061,7 +1062,7 @@ func TestManagerCTZeroIngestionHistogram(t *testing.T) { if len(app.resultHistograms) > 0 { return nil } - return fmt.Errorf("expected some histogram samples, got none") + return errors.New("expected some histogram samples, got none") }), "after 1 minute") scrapeManager.Stop() diff --git a/scrape/scrape.go b/scrape/scrape.go index d40e0be2e..c5e344982 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -112,8 +112,10 @@ type scrapeLoopOptions struct { trackTimestampsStaleness bool interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string mrc []*relabel.Config cache *scrapeCache @@ -179,7 +181,8 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.labelLimits, opts.interval, opts.timeout, - opts.scrapeClassicHistograms, + opts.alwaysScrapeClassicHist, + opts.convertClassicHistToNHCB, options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, @@ -189,6 +192,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed metrics, options.skipOffsetting, opts.validationScheme, + opts.fallbackScrapeProtocol, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -325,6 +329,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() ) validationScheme := model.UTF8Validation @@ -371,6 +376,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { interval: interval, timeout: timeout, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) ) if err != nil { @@ -480,7 +486,9 @@ func (sp *scrapePool) sync(targets []*Target) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs - scrapeClassicHistograms = sp.config.ScrapeClassicHistograms + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() + alwaysScrapeClassicHist = sp.config.AlwaysScrapeClassicHistograms + convertClassicHistToNHCB = sp.config.ConvertClassicHistogramsToNHCB ) validationScheme := model.UTF8Validation @@ -521,8 +529,10 @@ func (sp *scrapePool) sync(targets []*Target) { mrc: mrc, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) if err != nil { l.setForcedError(err) @@ -883,8 +893,10 @@ type scrapeLoop struct { labelLimits *labelLimits interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string // Feature flagged options. enableNativeHistogramIngestion bool @@ -1183,7 +1195,8 @@ func newScrapeLoop(ctx context.Context, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, - scrapeClassicHistograms bool, + alwaysScrapeClassicHist bool, + convertClassicHistToNHCB bool, enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, @@ -1193,6 +1206,7 @@ func newScrapeLoop(ctx context.Context, metrics *scrapeMetrics, skipOffsetting bool, validationScheme model.ValidationScheme, + fallbackScrapeProtocol string, ) *scrapeLoop { if l == nil { l = promslog.NewNopLogger() @@ -1237,7 +1251,8 @@ func newScrapeLoop(ctx context.Context, labelLimits: labelLimits, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, enableNativeHistogramIngestion: enableNativeHistogramIngestion, enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, @@ -1245,6 +1260,7 @@ func newScrapeLoop(ctx context.Context, metrics: metrics, skipOffsetting: skipOffsetting, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1405,7 +1421,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er sl.l.Debug("Scrape failed", "err", scrapeErr) sl.scrapeFailureLoggerMtx.RLock() if sl.scrapeFailureLogger != nil { - sl.scrapeFailureLogger.Error("err", scrapeErr) + sl.scrapeFailureLogger.Error(scrapeErr.Error()) } sl.scrapeFailureLoggerMtx.RUnlock() if errc != nil { @@ -1537,11 +1553,24 @@ type appendErrors struct { } func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { - p, err := textparse.New(b, contentType, sl.scrapeClassicHistograms, sl.enableCTZeroIngestion, sl.symbolTable) + p, err := textparse.New(b, contentType, sl.fallbackScrapeProtocol, sl.alwaysScrapeClassicHist, sl.enableCTZeroIngestion, sl.symbolTable) + if p == nil { + sl.l.Error( + "Failed to determine correct type of scrape target.", + "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, + "err", err, + ) + return + } + if sl.convertClassicHistToNHCB { + p = textparse.NewNHCBParser(p, sl.symbolTable, sl.alwaysScrapeClassicHist) + } if err != nil { sl.l.Debug( - "Invalid content type on scrape, using prometheus parser as fallback.", + "Invalid content type on scrape, using fallback setting.", "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, "err", err, ) } @@ -1835,7 +1864,9 @@ loop: if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + app.SetOptions(nil) switch { case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): // Do not count these in logging, as this is expected if a target @@ -1941,7 +1972,7 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { ts := timestamp.FromTime(start) - + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) stale := math.Float64frombits(value.StaleNaN) b := labels.NewBuilder(labels.EmptyLabels()) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index bdf85a451..02a31b762 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -29,6 +29,7 @@ import ( "strings" "sync" "testing" + "text/template" "time" "github.com/gogo/protobuf/proto" @@ -85,6 +86,97 @@ func TestNewScrapePool(t *testing.T) { require.NotNil(t, sp.newLoop, "newLoop function not initialized.") } +func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { + // Test with default OutOfOrderTimeWindow (0) + t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + runScrapeLoopTest(t, s, false) + }) + + // Test with specific OutOfOrderTimeWindow (600000) + t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + runScrapeLoopTest(t, s, true) + }) +} + +func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + // Create an appender for adding samples to the storage. + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + // Current time for generating timestamps. + now := time.Now() + + // Calculate timestamps for the samples based on the current time. + now = now.Truncate(time.Minute) // round down the now timestamp to the nearest minute + timestampInorder1 := now + timestampOutOfOrder := now.Add(-5 * time.Minute) + timestampInorder2 := now.Add(5 * time.Minute) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", timestampInorder1) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 2`), "", timestampOutOfOrder) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 3`), "", timestampInorder2) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + // Query the samples back from the storage. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + // Use a matcher to filter the metric name. + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + + // Define the expected results + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder1), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder2), + f: 3, + }, + } + + if expectOutOfOrder { + require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results) + } else { + require.Equal(t, want, results, "Appended samples not as expected:\n%s", results) + } +} + func TestDroppedTargetsList(t *testing.T) { var ( app = &nopAppendable{} @@ -685,11 +777,13 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), false, model.LegacyValidation, + "text/plain", ) } @@ -828,11 +922,13 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) // The loop must terminate during the initial offset if the context @@ -914,7 +1010,7 @@ func TestScrapeLoopForcedErr(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) sl := newBasicScrapeLoop(t, ctx, scraper, app, time.Second) - forcedErr := fmt.Errorf("forced err") + forcedErr := errors.New("forced err") sl.setForcedError(forcedErr) scraper.scrapeFunc = func(context.Context, io.Writer) error { @@ -973,11 +1069,13 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) defer cancel() @@ -1150,6 +1248,87 @@ func BenchmarkScrapeLoopAppendOM(b *testing.B) { } } +func TestSetOptionsHandlingStaleness(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + signal := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Function to run the scrape loop + runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { + var ( + scraper = &testScraper{} + app = func(ctx context.Context) storage.Appender { + return s.Appender(ctx) + } + ) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + numScrapes := 0 + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == cue { + action(sl) + } + w.Write([]byte(fmt.Sprintf("metric_a{a=\"1\",b=\"1\"} %d\n", 42+numScrapes))) + return nil + } + sl.run(nil) + } + go func() { + runScrapeLoop(ctx, t, 2, func(sl *scrapeLoop) { + go sl.stop() + // Wait a bit then start a new target. + time.Sleep(100 * time.Millisecond) + go func() { + runScrapeLoop(ctx, t, 4, func(_ *scrapeLoop) { + cancel() + }) + signal <- struct{}{} + }() + }) + }() + + select { + case <-signal: + case <-time.After(10 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + ctx1, cancel := context.WithCancel(context.Background()) + defer cancel() + + q, err := s.Querier(0, time.Now().UnixNano()) + + require.NoError(t, err) + defer q.Close() + + series := q.Select(ctx1, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + var c int + for _, s := range results { + if value.IsStaleNaN(s.f) { + c++ + } + } + require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { appender := &collectResultAppender{} var ( @@ -1285,7 +1464,7 @@ func TestScrapeLoopCache(t *testing.T) { case 4: cancel() } - return fmt.Errorf("scrape failed") + return errors.New("scrape failed") } go func() { @@ -1526,7 +1705,8 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { fakeRef := storage.SeriesRef(1) expValue := float64(1) metric := []byte(`metric{n="1"} 1`) - p, warning := textparse.New(metric, "", false, false, labels.NewSymbolTable()) + p, warning := textparse.New(metric, "text/plain", "", false, false, labels.NewSymbolTable()) + require.NotNil(t, p) require.NoError(t, warning) var lset labels.Labels @@ -1846,7 +2026,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { title string - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool enableNativeHistogramsIngestion bool scrapeText string contentType string @@ -2115,7 +2295,7 @@ metric: < > `, - scrapeClassicHistograms: true, + alwaysScrapeClassicHist: true, contentType: "application/vnd.google.protobuf", floats: []floatSample{ {metric: labels.FromStrings("__name__", "test_histogram_count"), t: 1234568, f: 175}, @@ -2177,7 +2357,7 @@ metric: < sl.reportSampleMutator = func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, discoveryLabels) } - sl.scrapeClassicHistograms = test.scrapeClassicHistograms + sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist now := time.Now() @@ -3084,7 +3264,7 @@ func TestScrapeReportSingleAppender(t *testing.T) { scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { numScrapes++ if numScrapes%4 == 0 { - return fmt.Errorf("scrape failed") + return errors.New("scrape failed") } w.Write([]byte("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n")) return nil @@ -3469,6 +3649,524 @@ test_summary_count 199 checkValues("quantile", expectedQuantileValues, series) } +// Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. +func TestConvertClassicHistogramsToNHCB(t *testing.T) { + genTestCounterText := func(name string, value int, withMetadata bool) string { + if withMetadata { + return fmt.Sprintf(` +# HELP %s some help text +# TYPE %s counter +%s{address="0.0.0.0",port="5001"} %d +`, name, name, name, value) + } + return fmt.Sprintf(` +%s %d +`, name, value) + } + genTestHistText := func(name string, withMetadata bool) string { + data := map[string]interface{}{ + "name": name, + } + b := &bytes.Buffer{} + if withMetadata { + template.Must(template.New("").Parse(` +# HELP {{.name}} This is a histogram with default buckets +# TYPE {{.name}} histogram +`)).Execute(b, data) + } + template.Must(template.New("").Parse(` +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="10"} 1 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="+Inf"} 1 +{{.name}}_sum{address="0.0.0.0",port="5001"} 10 +{{.name}}_count{address="0.0.0.0",port="5001"} 1 +`)).Execute(b, data) + return b.String() + } + genTestCounterProto := func(name string, value int) string { + return fmt.Sprintf(` +name: "%s" +help: "some help text" +type: COUNTER +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + counter: < + value: %d + > +> +`, name, value) + } + genTestHistProto := func(name string, hasClassic, hasExponential bool) string { + var classic string + if hasClassic { + classic = ` +bucket: < + cumulative_count: 0 + upper_bound: 0.005 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.01 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.025 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.05 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.1 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.25 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 1 +> +bucket: < + cumulative_count: 0 + upper_bound: 2.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 5 +> +bucket: < + cumulative_count: 1 + upper_bound: 10 +>` + } + var expo string + if hasExponential { + expo = ` +schema: 3 +zero_threshold: 2.938735877055719e-39 +zero_count: 0 +positive_span: < + offset: 2 + length: 1 +> +positive_delta: 1` + } + return fmt.Sprintf(` +name: "%s" +help: "This is a histogram with default buckets" +type: HISTOGRAM +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + histogram: < + sample_count: 1 + sample_sum: 10 + %s + %s + > + timestamp_ms: 1234568 +> +`, name, classic, expo) + } + + metricsTexts := map[string]struct { + text []string + contentType string + hasClassic bool + hasExponential bool + }{ + "text": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + genTestHistText("test_histogram_3", true), + }, + hasClassic: true, + }, + "text, in different order": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestHistText("test_histogram_3", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + }, + hasClassic: true, + }, + "protobuf": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, in different order": { + text: []string{ + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, with additional native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, true), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + hasExponential: true, + }, + "protobuf, with only native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", false, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", false, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", false, true), + }, + contentType: "application/vnd.google.protobuf", + hasExponential: true, + }, + } + + checkBucketValues := func(expectedCount int, series storage.SeriesSet) { + labelName := "le" + var expectedValues []string + if expectedCount > 0 { + expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} + } + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "unexpected number of label values, expected %v but found %v", expectedValues, foundLeValues) + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + // Checks that the expected series is present and runs a basic sanity check of the float values. + checkFloatSeries := func(series storage.SeriesSet, expectedCount int, expectedFloat float64) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + _, f := i.At() + require.Equal(t, expectedFloat, f) + case chunkenc.ValHistogram: + panic("unexpected value type: histogram") + case chunkenc.ValFloatHistogram: + panic("unexpected value type: float histogram") + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of float series not as expected") + } + + // Checks that the expected series is present and runs a basic sanity check of the histogram values. + checkHistSeries := func(series storage.SeriesSet, expectedCount int, expectedSchema int32) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + panic("unexpected value type: float") + case chunkenc.ValHistogram: + _, h := i.AtHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + case chunkenc.ValFloatHistogram: + _, h := i.AtFloatHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of histogram series not as expected") + } + + for metricsTextName, metricsText := range metricsTexts { + for name, tc := range map[string]struct { + alwaysScrapeClassicHistograms bool + convertClassicHistToNHCB bool + }{ + "convert with scrape": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: true, + }, + "convert without scrape": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: true, + }, + "scrape without convert": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: false, + }, + "neither scrape nor convert": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: false, + }, + } { + var expectedClassicHistCount, expectedNativeHistCount int + var expectCustomBuckets bool + if metricsText.hasExponential { + expectedNativeHistCount = 1 + expectCustomBuckets = false + expectedClassicHistCount = 0 + if metricsText.hasClassic && tc.alwaysScrapeClassicHistograms { + expectedClassicHistCount = 1 + } + } else if metricsText.hasClassic { + switch { + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 0 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 0 + } + } + + t.Run(fmt.Sprintf("%s with %s", name, metricsTextName), func(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(50 * time.Millisecond), + ScrapeTimeout: model.Duration(25 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, + } + + scrapeCount := 0 + scraped := make(chan bool) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if metricsText.contentType != "" { + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + for _, text := range metricsText.text { + buf := &bytes.Buffer{} + // In case of protobuf, we have to create the binary representation. + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(text, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) + buf.Write(varintBuf) + buf.Write(protoBuf) + w.Write(buf.Bytes()) + } + } else { + for _, text := range metricsText.text { + fmt.Fprint(w, text) + } + } + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + var series storage.SeriesSet + + for i := 1; i <= 3; i++ { + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_count", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_sum", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_bucket", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_count", i))) + checkFloatSeries(series, expectedClassicHistCount, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_sum", i))) + checkFloatSeries(series, expectedClassicHistCount, 10.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_bucket", i))) + checkBucketValues(expectedClassicHistCount, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d", i))) + + var expectedSchema int32 + if expectCustomBuckets { + expectedSchema = histogram.CustomBucketsSchema + } else { + expectedSchema = 3 + } + checkHistSeries(series, expectedNativeHistCount, expectedSchema) + } + }) + } + } +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { appender := &collectResultAppender{} var ( @@ -3506,7 +4204,6 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * case <-time.After(5 * time.Second): t.Fatalf("Scrape wasn't stopped.") } - // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for // each scrape successful or not. require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) @@ -3835,7 +4532,7 @@ scrape_configs: mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) require.NoError(t, err) - cfg, err := config.Load(configStr, false, promslog.NewNopLogger()) + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) mng.ApplyConfig(cfg) tsets := make(chan map[string][]*targetgroup.Group) diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index 1c099932b..305146993 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,16 +24,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 with: go-version: 1.23.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: args: --verbose - version: v1.60.2 + version: v1.61.0 diff --git a/storage/buffer.go b/storage/buffer.go index ad504ad5d..e847c10e6 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -626,7 +626,7 @@ func addF(s fSample, buf []fSample, r *sampleRing) []fSample { return buf } -// addF adds an hSample to a (specialized) hSample buffer. +// addH adds an hSample to a (specialized) hSample buffer. func addH(s hSample, buf []hSample, r *sampleRing) []hSample { l := len(buf) // Grow the ring buffer if it fits no more elements. diff --git a/storage/fanout.go b/storage/fanout.go index 6ff517895..4d076788a 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -147,6 +147,16 @@ type fanoutAppender struct { secondaries []Appender } +// SetOptions propagates the hints to both primary and secondary appenders. +func (f *fanoutAppender) SetOptions(opts *AppendOptions) { + if f.primary != nil { + f.primary.SetOptions(opts) + } + for _, appender := range f.secondaries { + appender.SetOptions(opts) + } +} + func (f *fanoutAppender) Append(ref SeriesRef, l labels.Labels, t int64, v float64) (SeriesRef, error) { ref, err := f.primary.Append(ref, l, t, v) if err != nil { diff --git a/storage/interface.go b/storage/interface.go index b7ef14ce9..32b90cc10 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -41,17 +41,17 @@ var ( ErrOutOfOrderExemplar = errors.New("out of order exemplar") ErrDuplicateExemplar = errors.New("duplicate exemplar") ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) - ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") - ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") - ErrOOONativeHistogramsDisabled = fmt.Errorf("out-of-order native histogram ingestion is disabled") + ErrExemplarsDisabled = errors.New("exemplar storage is disabled or max exemplars is less than or equal to 0") + ErrNativeHistogramsDisabled = errors.New("native histograms are disabled") + ErrOOONativeHistogramsDisabled = errors.New("out-of-order native histogram ingestion is disabled") // ErrOutOfOrderCT indicates failed append of CT to the storage // due to CT being older the then newer sample. // NOTE(bwplotka): This can be both an instrumentation failure or commonly expected // behaviour, and we currently don't have a way to determine this. As a result // it's recommended to ignore this error for now. - ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") - ErrCTNewerThanSample = fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") + ErrOutOfOrderCT = errors.New("created timestamp out of order, ignoring") + ErrCTNewerThanSample = errors.New("CT is newer or the same as sample's timestamp, ignoring") ) // SeriesRef is a generic series reference. In prometheus it is either a @@ -243,6 +243,10 @@ func (f QueryableFunc) Querier(mint, maxt int64) (Querier, error) { return f(mint, maxt) } +type AppendOptions struct { + DiscardOutOfOrder bool +} + // Appender provides batched appends against a storage. // It must be completed with a call to Commit or Rollback and must not be reused afterwards. // @@ -271,6 +275,10 @@ type Appender interface { // Appender has to be discarded after rollback. Rollback() error + // SetOptions configures the appender with specific append options such as + // discarding out-of-order samples even if out-of-order is enabled in the TSDB. + SetOptions(opts *AppendOptions) + ExemplarAppender HistogramAppender MetadataUpdater diff --git a/storage/merge.go b/storage/merge.go index 2424b26ab..a4d0934b1 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -153,13 +153,18 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints ) // Schedule all Selects for all queriers we know about. for _, querier := range q.queriers { + // copy the matchers as some queriers may alter the slice. + // See https://github.com/prometheus/prometheus/issues/14723 + matchersCopy := make([]*labels.Matcher, len(matchers)) + copy(matchersCopy, matchers) + wg.Add(1) - go func(qr genericQuerier) { + go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. - seriesSetChan <- qr.Select(ctx, true, hints, matchers...) - }(querier) + seriesSetChan <- qr.Select(ctx, true, hints, m...) + }(querier, matchersCopy) } go func() { wg.Wait() diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index 82f46b82f..20ec53d6f 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -16,7 +16,6 @@ package azuread import ( "context" "errors" - "fmt" "net/http" "strings" "sync" @@ -110,55 +109,55 @@ func (c *AzureADConfig) Validate() error { } if c.Cloud != AzureChina && c.Cloud != AzureGovernment && c.Cloud != AzurePublic { - return fmt.Errorf("must provide a cloud in the Azure AD config") + return errors.New("must provide a cloud in the Azure AD config") } if c.ManagedIdentity == nil && c.OAuth == nil && c.SDK == nil { - return fmt.Errorf("must provide an Azure Managed Identity, Azure OAuth or Azure SDK in the Azure AD config") + return errors.New("must provide an Azure Managed Identity, Azure OAuth or Azure SDK in the Azure AD config") } if c.ManagedIdentity != nil && c.OAuth != nil { - return fmt.Errorf("cannot provide both Azure Managed Identity and Azure OAuth in the Azure AD config") + return errors.New("cannot provide both Azure Managed Identity and Azure OAuth in the Azure AD config") } if c.ManagedIdentity != nil && c.SDK != nil { - return fmt.Errorf("cannot provide both Azure Managed Identity and Azure SDK in the Azure AD config") + return errors.New("cannot provide both Azure Managed Identity and Azure SDK in the Azure AD config") } if c.OAuth != nil && c.SDK != nil { - return fmt.Errorf("cannot provide both Azure OAuth and Azure SDK in the Azure AD config") + return errors.New("cannot provide both Azure OAuth and Azure SDK in the Azure AD config") } if c.ManagedIdentity != nil { if c.ManagedIdentity.ClientID == "" { - return fmt.Errorf("must provide an Azure Managed Identity client_id in the Azure AD config") + return errors.New("must provide an Azure Managed Identity client_id in the Azure AD config") } _, err := uuid.Parse(c.ManagedIdentity.ClientID) if err != nil { - return fmt.Errorf("the provided Azure Managed Identity client_id is invalid") + return errors.New("the provided Azure Managed Identity client_id is invalid") } } if c.OAuth != nil { if c.OAuth.ClientID == "" { - return fmt.Errorf("must provide an Azure OAuth client_id in the Azure AD config") + return errors.New("must provide an Azure OAuth client_id in the Azure AD config") } if c.OAuth.ClientSecret == "" { - return fmt.Errorf("must provide an Azure OAuth client_secret in the Azure AD config") + return errors.New("must provide an Azure OAuth client_secret in the Azure AD config") } if c.OAuth.TenantID == "" { - return fmt.Errorf("must provide an Azure OAuth tenant_id in the Azure AD config") + return errors.New("must provide an Azure OAuth tenant_id in the Azure AD config") } var err error _, err = uuid.Parse(c.OAuth.ClientID) if err != nil { - return fmt.Errorf("the provided Azure OAuth client_id is invalid") + return errors.New("the provided Azure OAuth client_id is invalid") } _, err = regexp.MatchString("^[0-9a-zA-Z-.]+$", c.OAuth.TenantID) if err != nil { - return fmt.Errorf("the provided Azure OAuth tenant_id is invalid") + return errors.New("the provided Azure OAuth tenant_id is invalid") } } @@ -168,7 +167,7 @@ func (c *AzureADConfig) Validate() error { if c.SDK.TenantID != "" { _, err = regexp.MatchString("^[0-9a-zA-Z-.]+$", c.SDK.TenantID) if err != nil { - return fmt.Errorf("the provided Azure OAuth tenant_id is invalid") + return errors.New("the provided Azure OAuth tenant_id is invalid") } } } diff --git a/storage/remote/client.go b/storage/remote/client.go index 62218cfba..23775122e 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "net/http" + "net/http/httptrace" "strconv" "strings" "time" @@ -31,6 +32,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" + "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" @@ -213,8 +215,11 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { if conf.WriteProtoMsg != "" { writeProtoMsg = conf.WriteProtoMsg } - - httpClient.Transport = otelhttp.NewTransport(t) + httpClient.Transport = otelhttp.NewTransport( + t, + otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { + return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) + })) return &Client{ remoteName: name, urlString: conf.URL.String(), diff --git a/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go new file mode 100644 index 000000000..cb9257d07 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go @@ -0,0 +1,106 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/golang/go/blob/f2d118fd5f7e872804a5825ce29797f81a28b0fa/src/strings/strings.go +// Provenance-includes-license: BSD-3-Clause +// Provenance-includes-copyright: Copyright The Go Authors. + +package prometheus + +import "strings" + +// fieldsFunc is a copy of strings.FieldsFunc from the Go standard library, +// but it also returns the separators as part of the result. +func fieldsFunc(s string, f func(rune) bool) ([]string, []string) { + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int + } + spans := make([]span, 0, 32) + separators := make([]string, 0, 32) + + // Find the field start and end indices. + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { + if f(rune) { + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start + separators = append(separators, string(s[end])) + } + } else { + if start < 0 { + start = end + } + } + } + + // Last field might end at EOF. + if start >= 0 { + spans = append(spans, span{start, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] + } + + return a, separators +} + +// join is a copy of strings.Join from the Go standard library, +// but it also accepts a slice of separators to join the elements with. +// If the slice of separators is shorter than the slice of elements, use a default value. +// We also don't check for integer overflow. +func join(elems []string, separators []string, def string) string { + switch len(elems) { + case 0: + return "" + case 1: + return elems[0] + } + + var n int + var sep string + sepLen := len(separators) + for i, elem := range elems { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + n += len(sep) + len(elem) + } + + var b strings.Builder + b.Grow(n) + b.WriteString(elems[0]) + for i, s := range elems[1:] { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + b.WriteString(sep) + b.WriteString(s) + } + return b.String() +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a112b9bbc..b928e6888 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -19,6 +19,8 @@ package prometheus import ( "strings" "unicode" + + "github.com/prometheus/prometheus/util/strutil" ) // Normalizes the specified label to follow Prometheus label names standard. @@ -26,16 +28,14 @@ import ( // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels. // // Labels that start with non-letter rune will be prefixed with "key_". -// // An exception is made for double-underscores which are allowed. -func NormalizeLabel(label string) string { +func NormalizeLabel(label string, allowUTF8 bool) string { // Trivial case - if len(label) == 0 { + if len(label) == 0 || allowUTF8 { return label } - // Replace all non-alphanumeric runes with underscores - label = strings.Map(sanitizeRune, label) + label = strutil.SanitizeLabelName(label) // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { @@ -46,11 +46,3 @@ func NormalizeLabel(label string) string { return label } - -// Return '_' for anything non-alphanumeric. -func sanitizeRune(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return r - } - return '_' -} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go new file mode 100644 index 000000000..19ab6cd17 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -0,0 +1,48 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeLabel(t *testing.T) { + tests := []struct { + label string + expected string + expectedUTF8 string + }{ + {"", "", ""}, + {"label:with:colons", "label_with_colons", "label:with:colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_", "label!with&special$chars)"}, + {"label_with_foreign_characters_字符", "label_with_foreign_characters___", "label_with_foreign_characters_字符"}, + {"label.with.dots", "label_with_dots", "label.with.dots"}, + {"123label", "key_123label", "123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore", "_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + result := NormalizeLabel(test.label, false) + require.Equal(t, test.expected, result) + uTF8result := NormalizeLabel(test.label, true) + require.Equal(t, test.expectedUTF8, uTF8result) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0f472b80a..335705aa8 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -17,9 +17,12 @@ package prometheus import ( + "regexp" + "slices" "strings" "unicode" + "github.com/prometheus/prometheus/util/strutil" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -84,38 +87,52 @@ var perUnitMap = map[string]string{ // // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, // https://prometheus.io/docs/practices/naming/#metric-and-label-naming -// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. -func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string { - var metricName string - +// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. +func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes, allowUTF8 bool) string { // Full normalization following standard Prometheus naming conventions if addMetricSuffixes { - return normalizeName(metric, namespace) + return normalizeName(metric, namespace, allowUTF8) } - // Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars - metricName = RemovePromForbiddenRunes(metric.Name()) + var metricName string + if !allowUTF8 { + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName = strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") + } else { + metricName = metric.Name() + } // Namespace? if namespace != "" { return namespace + "_" + metricName } - // Metric name starts with a digit? Prefix it with an underscore - if metricName != "" && unicode.IsDigit(rune(metricName[0])) { + // Metric name starts with a digit and utf8 not allowed? Prefix it with an underscore. + if metricName != "" && unicode.IsDigit(rune(metricName[0])) && !allowUTF8 { metricName = "_" + metricName } return metricName } -// Build a normalized name for the specified metric -func normalizeName(metric pmetric.Metric, namespace string) string { - // Split metric name into "tokens" (remove all non-alphanumerics) - nameTokens := strings.FieldsFunc( - metric.Name(), - func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }, - ) +// Build a normalized name for the specified metric. +func normalizeName(metric pmetric.Metric, namespace string, allowUTF8 bool) string { + var translationFunc func(rune) bool + if !allowUTF8 { + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + translationFunc = func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) } + } else { + translationFunc = func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' } + } + // Split metric name into "tokens" (of supported metric name runes). + // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + nameTokens, separators := fieldsFunc(metric.Name(), translationFunc) // Split unit at the '/' if any unitTokens := strings.SplitN(metric.Unit(), "/", 2) @@ -123,11 +140,15 @@ func normalizeName(metric pmetric.Metric, namespace string) string { // Main unit // Append if not blank, doesn't contain '{}', and is not present in metric name already if len(unitTokens) > 0 { + var mainUnitProm, perUnitProm string mainUnitOTel := strings.TrimSpace(unitTokens[0]) if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { - mainUnitProm := CleanUpString(unitMapGetOrDefault(mainUnitOTel)) - if mainUnitProm != "" && !contains(nameTokens, mainUnitProm) { - nameTokens = append(nameTokens, mainUnitProm) + mainUnitProm = unitMapGetOrDefault(mainUnitOTel) + if !allowUTF8 { + mainUnitProm = cleanUpUnit(mainUnitProm) + } + if slices.Contains(nameTokens, mainUnitProm) { + mainUnitProm = "" } } @@ -136,13 +157,29 @@ func normalizeName(metric pmetric.Metric, namespace string) string { if len(unitTokens) > 1 && unitTokens[1] != "" { perUnitOTel := strings.TrimSpace(unitTokens[1]) if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { - perUnitProm := CleanUpString(perUnitMapGetOrDefault(perUnitOTel)) - if perUnitProm != "" && !contains(nameTokens, perUnitProm) { - nameTokens = append(nameTokens, "per", perUnitProm) + perUnitProm = perUnitMapGetOrDefault(perUnitOTel) + if !allowUTF8 { + perUnitProm = cleanUpUnit(perUnitProm) + } + } + if perUnitProm != "" { + perUnitProm = "per_" + perUnitProm + if slices.Contains(nameTokens, perUnitProm) { + perUnitProm = "" } } } + if perUnitProm != "" { + mainUnitProm = strings.TrimSuffix(mainUnitProm, "_") + } + + if mainUnitProm != "" { + nameTokens = append(nameTokens, mainUnitProm) + } + if perUnitProm != "" { + nameTokens = append(nameTokens, perUnitProm) + } } // Append _total for Counters @@ -164,8 +201,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string { nameTokens = append([]string{namespace}, nameTokens...) } - // Build the string from the tokens, separated with underscores - normalizedName := strings.Join(nameTokens, "_") + // Build the string from the tokens + separators. + // If UTF-8 isn't allowed, we'll use underscores as separators. + if !allowUTF8 { + separators = []string{} + } + normalizedName := join(nameTokens, separators, "_") // Metric name cannot start with a digit, so prefix it with "_" in this case if normalizedName != "" && unicode.IsDigit(rune(normalizedName[0])) { @@ -235,13 +276,15 @@ func removeSuffix(tokens []string, suffix string) []string { return tokens } -// Clean up specified string so it's Prometheus compliant -func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_") -} - -func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_") +// cleanUpUnit cleans up unit so it matches model.LabelNameRE. +func cleanUpUnit(unit string) string { + // Multiple consecutive underscores are replaced with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + multipleUnderscoresRE := regexp.MustCompile(`__+`) + return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString( + strutil.SanitizeLabelName(unit), + "_", + ), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit @@ -262,16 +305,6 @@ func perUnitMapGetOrDefault(perUnit string) string { return perUnit } -// Returns whether the slice contains the specified value -func contains(slice []string, value string) bool { - for _, sliceEntry := range slice { - if sliceEntry == value { - return true - } - } - return false -} - // Remove the specified value from the slice func removeItem(slice []string, value string) []string { newSlice := make([]string, 0, len(slice)) diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 07b9b0a78..d97e7a560 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -25,92 +25,119 @@ import ( ) func TestByte(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "", false)) } func TestByteCounter(t *testing.T) { - require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "")) - require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "")) + require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "", false)) + require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "", false)) } func TestWhiteSpaces(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "", false)) } func TestNonStandardUnit(t *testing.T) { - require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "", false)) } func TestNonStandardUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "", false)) } func TestBrokenUnit(t *testing.T) { - require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "", false)) } func TestBrokenUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "", false)) } func TestRatio(t *testing.T) { - require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "")) - require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "")) - require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "")) + require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "", false)) + require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "", false)) + require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "", false)) } func TestHertz(t *testing.T) { - require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "")) + require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "", false)) } func TestPer(t *testing.T) { - require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "")) - require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "")) + require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "", false)) + require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "", false)) } func TestPercent(t *testing.T) { - require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "")) - require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "")) + require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "", false)) + require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "", false)) } func TestEmpty(t *testing.T) { - require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "")) - require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "")) + require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "", false)) + require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "", false)) } -func TestUnsupportedRunes(t *testing.T) { - require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "")) - require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "")) - require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "")) +func TestAllowUTF8(t *testing.T) { + t.Run("allow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported.metric.temperature_°F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", true)) + require.Equal(t, "unsupported.metric.weird_+=.:,!* & #", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", true)) + require.Equal(t, "unsupported.metric.redundant___test $_per_°C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", true)) + require.Equal(t, "metric_with_字符_foreign_characters_ど", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", true)) + }) + t.Run("disallow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", false)) + require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", false)) + require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", false)) + require.Equal(t, "metric_with_foreign_characters", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", false)) + }) +} + +func TestAllowUTF8KnownBugs(t *testing.T) { + // Due to historical reasons, the translator code was copied from OpenTelemetry collector codebase. + // Over there, they tried to provide means to translate metric names following Prometheus conventions that are documented here: + // https://prometheus.io/docs/practices/naming/ + // + // Althogh not explicitly said, it was implied that words should be separated by a single underscore and the codebase was written + // with that in mind. + // + // Now that we're allowing OTel users to have their original names stored in prometheus without any transformation, we're facing problems + // where two (or more) UTF-8 characters are being used to separate words. + // TODO(arthursens): Fix it! + + // We're asserting on 'NotEqual', which proves the bug. + require.NotEqual(t, "metric....split_=+by_//utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) + // Here we're asserting on 'Equal', showing the current behavior. + require.Equal(t, "metric.split_by_utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) } func TestOTelReceivers(t *testing.T) { - require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "")) - require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "")) - require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "")) - require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "")) - require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "")) - require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "")) - require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "")) - require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "")) - require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "")) - require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "")) - require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "")) - require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "")) - require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "")) - require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "")) - require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "")) - require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "")) - require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "")) - require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "")) - require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "")) - require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "")) - require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "")) - require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "")) + require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "", false)) + require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "", false)) + require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "", false)) + require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "", false)) + require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "", false)) + require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "", false)) + require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "", false)) + require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "", false)) + require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "", false)) + require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "", false)) + require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "", false)) + require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "", false)) + require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "", false)) + require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "", false)) + require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "", false)) + require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "", false)) + require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "", false)) + require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "", false)) + require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "", false)) + require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "", false)) + require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "", false)) + require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "", false)) } func TestTrimPromSuffixes(t *testing.T) { @@ -144,17 +171,17 @@ func TestTrimPromSuffixes(t *testing.T) { } func TestNamespace(t *testing.T) { - require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space")) - require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space")) + require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space", false)) + require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space", false)) } -func TestCleanUpString(t *testing.T) { - require.Equal(t, "", CleanUpString("")) - require.Equal(t, "a_b", CleanUpString("a b")) - require.Equal(t, "hello_world", CleanUpString("hello, world!")) - require.Equal(t, "hello_you_2", CleanUpString("hello you 2")) - require.Equal(t, "1000", CleanUpString("$1000")) - require.Equal(t, "", CleanUpString("*+$^=)")) +func TestCleanUpUnit(t *testing.T) { + require.Equal(t, "", cleanUpUnit("")) + require.Equal(t, "a_b", cleanUpUnit("a b")) + require.Equal(t, "hello_world", cleanUpUnit("hello, world")) + require.Equal(t, "hello_you_2", cleanUpUnit("hello you 2")) + require.Equal(t, "1000", cleanUpUnit("$1000")) + require.Equal(t, "", cleanUpUnit("*+$^=)")) } func TestUnitMapGetOrDefault(t *testing.T) { @@ -179,27 +206,29 @@ func TestRemoveItem(t *testing.T) { require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a")) } -func TestBuildCompliantNameWithNormalize(t *testing.T) { - require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true)) - require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) - require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) - require.Equal(t, "foo_bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) +func TestBuildCompliantNameWithSuffixes(t *testing.T) { + require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true, false)) + require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true, false)) + require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true, false)) + require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true, false)) // Gauges with unit 1 are considered ratios. - require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) + require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true, false)) // Slashes in units are converted. - require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true)) + require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true, false)) + require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true, false)) } func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false)) - require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false)) - require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false)) - require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false, false)) + require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false, false)) + require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false, false)) + require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false, false)) + require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false, false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false, false)) + require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false, false)) } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index fd7f58f07..30cfa8643 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -157,7 +157,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting // map ensures no duplicate label names. l := make(map[string]string, maxLabelCount) for _, label := range labels { - var finalKey = prometheustranslator.NormalizeLabel(label.Name) + var finalKey = prometheustranslator.NormalizeLabel(label.Name, settings.AllowUTF8) if existingValue, alreadyExists := l[finalKey]; alreadyExists { l[finalKey] = existingValue + ";" + label.Value } else { @@ -166,7 +166,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } for _, lbl := range promotedAttrs { - normalized := prometheustranslator.NormalizeLabel(lbl.Name) + normalized := prometheustranslator.NormalizeLabel(lbl.Name, settings.AllowUTF8) if _, exists := l[normalized]; !exists { l[normalized] = lbl.Value } @@ -205,7 +205,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } // internal labels should be maintained if !(len(name) > 4 && name[:2] == "__" && name[len(name)-2:] == "__") { - name = prometheustranslator.NormalizeLabel(name) + name = prometheustranslator.NormalizeLabel(name, settings.AllowUTF8) } l[name] = extras[i+1] } @@ -351,9 +351,17 @@ func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, exemplarRunes := 0 promExemplar := prompb.Exemplar{ - Value: exemplar.DoubleValue(), Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { val := hex.EncodeToString(traceID[:]) exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index a48a57b06..b22282097 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -48,7 +48,6 @@ func TestCreateAttributes(t *testing.T) { resource.Attributes().PutStr(k, v) } attrs := pcommon.NewMap() - attrs.PutStr("__name__", "test_metric") attrs.PutStr("metric-attr", "metric value") testCases := []struct { @@ -162,7 +161,7 @@ func TestCreateAttributes(t *testing.T) { settings := Settings{ PromoteResourceAttributes: tc.promoteResourceAttributes, } - lbls := createAttributes(resource, attrs, settings, nil, false) + lbls := createAttributes(resource, attrs, settings, nil, false, model.MetricNameLabel, "test_metric") assert.ElementsMatch(t, lbls, tc.expectedLabels) }) @@ -406,3 +405,38 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { }) } } + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go index 5fdd26ef2..dcd83b7f9 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -762,7 +762,7 @@ func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) { Settings{ ExportCreatedMetric: true, }, - prometheustranslator.BuildCompliantName(metric, "", true), + prometheustranslator.BuildCompliantName(metric, "", true, true), ) require.NoError(t, err) require.Empty(t, annots) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 0afd2ad57..4f8baf310 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -38,6 +38,7 @@ type Settings struct { ExportCreatedMetric bool AddMetricSuffixes bool SendMetadata bool + AllowUTF8 bool PromoteResourceAttributes []string } @@ -84,7 +85,7 @@ func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metric continue } - promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) + promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes, settings.AllowUTF8) // handle individual metrics based on type //exhaustive:enforce diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index ba4870419..b423d2cc6 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -43,7 +43,7 @@ func otelMetricTypeToPromMetricType(otelMetric pmetric.Metric) prompb.MetricMeta return prompb.MetricMetadata_UNKNOWN } -func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes bool) []*prompb.MetricMetadata { +func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes, allowUTF8 bool) []*prompb.MetricMetadata { resourceMetricsSlice := md.ResourceMetrics() metadataLength := 0 @@ -65,7 +65,7 @@ func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes bool) []*prompb metric := scopeMetrics.Metrics().At(k) entry := prompb.MetricMetadata{ Type: otelMetricTypeToPromMetricType(metric), - MetricFamilyName: prometheustranslator.BuildCompliantName(metric, "", addMetricSuffixes), + MetricFamilyName: prometheustranslator.BuildCompliantName(metric, "", addMetricSuffixes, allowUTF8), Help: metric.Description(), } metadata = append(metadata, &entry) diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 4b7c5a4e9..21fdf92e3 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -763,7 +763,7 @@ func TestDisableReshardOnRetry(t *testing.T) { onStoreCalled() return WriteResponseStats{}, RecoverableError{ - error: fmt.Errorf("fake error"), + error: errors.New("fake error"), retryAfter: model.Duration(retryAfter), } }, diff --git a/storage/remote/write.go b/storage/remote/write.go index 20e4ed10d..639f34452 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -278,6 +278,7 @@ func (rws *WriteStorage) Close() error { type timestampTracker struct { writeStorage *WriteStorage + appendOptions *storage.AppendOptions samples int64 exemplars int64 histograms int64 @@ -285,6 +286,10 @@ type timestampTracker struct { highestRecvTimestamp *maxTimestamp } +func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) { + t.appendOptions = opts +} + // Append implements storage.Appender. func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) { t.samples++ @@ -307,8 +312,23 @@ func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { - // TODO: Implement +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) { + t.samples++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } + return 0, nil +} + +func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { + t.histograms++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } return 0, nil } @@ -318,11 +338,6 @@ func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { - // AppendCTZeroSample is no-op for remote-write for now. - return 0, nil -} - // Commit implements storage.Appender. func (t *timestampTracker) Commit() error { t.writeStorage.samplesIn.incr(t.samples + t.exemplars + t.histograms) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 466673c99..87102a374 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -513,6 +513,7 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { converter := otlptranslator.NewPrometheusConverter() annots, err := converter.FromMetrics(r.Context(), req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, + AllowUTF8: otlpCfg.TranslationStrategy == config.NoUTF8EscapingWithSuffixes, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, }) if err != nil { diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index d91949131..c40f227ea 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -672,7 +672,7 @@ func TestCommitErr_V1Message(t *testing.T) { req, err := http.NewRequest("", "", bytes.NewReader(payload)) require.NoError(t, err) - appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} + appendable := &mockAppendable{commitErr: errors.New("commit error")} handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() @@ -696,7 +696,7 @@ func TestCommitErr_V2Message(t *testing.T) { req.Header.Set("Content-Encoding", string(SnappyBlockCompression)) req.Header.Set(RemoteWriteVersionHeader, RemoteWriteVersion20HeaderValue) - appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} + appendable := &mockAppendable{commitErr: errors.New("commit error")} handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() @@ -833,6 +833,10 @@ func (m *mockAppendable) Appender(_ context.Context) storage.Appender { return m } +func (m *mockAppendable) SetOptions(opts *storage.AppendOptions) { + panic("unimplemented") +} + func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if m.appendSampleErr != nil { return 0, m.appendSampleErr diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index b2c40b201..3863e6cd9 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -763,6 +763,7 @@ func (db *DB) Close() error { type appender struct { *DB + hints *storage.AppendOptions pendingSeries []record.RefSeries pendingSamples []record.RefSample @@ -783,6 +784,10 @@ type appender struct { floatHistogramSeries []*memSeries } +func (a *appender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts +} + func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { // series references and chunk references are identical for agent mode. headRef := chunks.HeadSeriesRef(ref) @@ -971,19 +976,139 @@ func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int return storage.SeriesRef(series.ref), nil } -func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - // TODO(bwplotka/arthursens): Wire metadata in the Agent's appender. - return 0, nil -} - func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { // TODO: Wire metadata in the Agent's appender. return 0, nil } -func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { - // TODO(bwplotka): Wire metadata in the Agent's appender. - return 0, nil +func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if err := h.Validate(); err != nil { + return 0, err + } + } + if fh != nil { + if err := fh.Validate(); err != nil { + return 0, err + } + } + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + // Ensure no empty labels have gotten through. + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + var created bool + series, created = a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: series.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + } + + series.Lock() + defer series.Unlock() + + if ct <= a.minValidTime(series.lastTs) { + return 0, storage.ErrOutOfOrderCT + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{ + Ref: series.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, series) + case fh != nil: + a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{ + Ref: series.ref, + T: ct, + FH: &histogram.FloatHistogram{}, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, series) + } + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + return storage.SeriesRef(series.ref), nil +} + +func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + newSeries, created := a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: newSeries.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + + series = newSeries + } + + series.Lock() + defer series.Unlock() + + if t <= a.minValidTime(series.lastTs) { + a.metrics.totalOutOfOrderSamples.Inc() + return 0, storage.ErrOutOfOrderSample + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + // NOTE: always modify pendingSamples and sampleSeries together. + a.pendingSamples = append(a.pendingSamples, record.RefSample{ + Ref: series.ref, + T: ct, + V: 0, + }) + a.sampleSeries = append(a.sampleSeries, series) + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + + return storage.SeriesRef(series.ref), nil } // Commit submits the collected samples and purges the batch. diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index 4d5fda25d..b28c29095 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -15,7 +15,9 @@ package agent import ( "context" + "errors" "fmt" + "io" "math" "path/filepath" "strconv" @@ -29,6 +31,7 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -933,6 +936,249 @@ func TestDBOutOfOrderTimeWindow(t *testing.T) { } } +type walSample struct { + t int64 + f float64 + h *histogram.Histogram + lbls labels.Labels + ref storage.SeriesRef +} + +func TestDBCreatedTimestampSamplesIngestion(t *testing.T) { + t.Parallel() + + type appendableSample struct { + t int64 + ct int64 + v float64 + lbls labels.Labels + h *histogram.Histogram + expectsError bool + } + + testHistogram := tsdbutil.GenerateTestHistograms(1)[0] + zeroHistogram := &histogram.Histogram{} + + lbls := labelsForTest(t.Name(), 1) + defLbls := labels.New(lbls[0]...) + + testCases := []struct { + name string + inputSamples []appendableSample + expectedSamples []*walSample + expectedSeriesCount int + }{ + { + name: "in order ct+normal sample/floatSamples", + inputSamples: []appendableSample{ + {t: 100, ct: 1, v: 10, lbls: defLbls}, + {t: 101, ct: 1, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, f: 0, lbls: defLbls}, + {t: 100, f: 10, lbls: defLbls}, + {t: 101, f: 10, lbls: defLbls}, + }, + }, + { + name: "CT+float && CT+histogram samples", + inputSamples: []appendableSample{ + { + t: 100, + ct: 30, + v: 20, + lbls: defLbls, + }, + { + t: 300, + ct: 230, + h: testHistogram, + lbls: defLbls, + }, + }, + expectedSamples: []*walSample{ + {t: 30, f: 0, lbls: defLbls}, + {t: 100, f: 20, lbls: defLbls}, + {t: 230, h: zeroHistogram, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 1, + }, + { + name: "CT+float && CT+histogram samples with error", + inputSamples: []appendableSample{ + { + // invalid CT + t: 100, + ct: 100, + v: 10, + lbls: defLbls, + expectsError: true, + }, + { + // invalid CT histogram + t: 300, + ct: 300, + h: testHistogram, + lbls: defLbls, + expectsError: true, + }, + }, + expectedSamples: []*walSample{ + {t: 100, f: 10, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 0, + }, + { + name: "In order ct+normal sample/histogram", + inputSamples: []appendableSample{ + {t: 100, h: testHistogram, ct: 1, lbls: defLbls}, + {t: 101, h: testHistogram, ct: 1, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, h: &histogram.Histogram{}}, + {t: 100, h: testHistogram}, + {t: 101, h: &histogram.Histogram{CounterResetHint: histogram.NotCounterReset}}, + }, + }, + { + name: "ct+normal then OOO sample/float", + inputSamples: []appendableSample{ + {t: 60_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 120_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 180_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 50_000, ct: 40_000, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 40_000, f: 0, lbls: defLbls}, + {t: 50_000, f: 10, lbls: defLbls}, + {t: 60_000, f: 10, lbls: defLbls}, + {t: 120_000, f: 10, lbls: defLbls}, + {t: 180_000, f: 10, lbls: defLbls}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 360_000 + s := createTestAgentDB(t, reg, opts) + app := s.Appender(context.TODO()) + + for _, sample := range tc.inputSamples { + // We supposed to write a Histogram to the WAL + if sample.h != nil { + _, err := app.AppendHistogramCTZeroSample(0, sample.lbls, sample.t, sample.ct, zeroHistogram, nil) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.AppendHistogram(0, sample.lbls, sample.t, sample.h, nil) + require.NoError(t, err) + } else { + // We supposed to write a float sample to the WAL + _, err := app.AppendCTZeroSample(0, sample.lbls, sample.t, sample.ct) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.Append(0, sample.lbls, sample.t, sample.v) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + // Close the DB to ensure all data is flushed to the WAL + require.NoError(t, s.Close()) + + // Check that we dont have any OOO samples in the WAL by checking metrics + families, err := reg.Gather() + require.NoError(t, err, "failed to gather metrics") + for _, f := range families { + if f.GetName() == "prometheus_agent_out_of_order_samples_total" { + t.Fatalf("unexpected metric %s", f.GetName()) + } + } + + outputSamples := readWALSamples(t, s.wal.Dir()) + + require.Equal(t, len(tc.expectedSamples), len(outputSamples), "Expected %d samples", len(tc.expectedSamples)) + + for i, expectedSample := range tc.expectedSamples { + for _, sample := range outputSamples { + if sample.t == expectedSample.t && sample.lbls.String() == expectedSample.lbls.String() { + if expectedSample.h != nil { + require.Equal(t, expectedSample.h, sample.h, "histogram value mismatch (sample index %d)", i) + } else { + require.Equal(t, expectedSample.f, sample.f, "value mismatch (sample index %d)", i) + } + } + } + } + }) + } +} + +func readWALSamples(t *testing.T, walDir string) []*walSample { + t.Helper() + sr, err := wlog.NewSegmentsReader(walDir) + require.NoError(t, err) + defer func(sr io.ReadCloser) { + err := sr.Close() + require.NoError(t, err) + }(sr) + + r := wlog.NewReader(sr) + dec := record.NewDecoder(labels.NewSymbolTable()) + + var ( + samples []record.RefSample + histograms []record.RefHistogramSample + + lastSeries record.RefSeries + outputSamples = make([]*walSample, 0) + ) + + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + series, err := dec.Series(rec, nil) + require.NoError(t, err) + lastSeries = series[0] + case record.Samples: + samples, err = dec.Samples(rec, samples[:0]) + require.NoError(t, err) + for _, s := range samples { + outputSamples = append(outputSamples, &walSample{ + t: s.T, + f: s.V, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + case record.HistogramSamples: + histograms, err = dec.HistogramSamples(rec, histograms[:0]) + require.NoError(t, err) + for _, h := range histograms { + outputSamples = append(outputSamples, &walSample{ + t: h.T, + h: h.H, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + } + } + + return outputSamples +} + func BenchmarkCreateSeries(b *testing.B) { s := createTestAgentDB(b, nil, DefaultOptions()) defer s.Close() diff --git a/tsdb/block.go b/tsdb/block.go index 48ba4588a..aec99788c 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -330,7 +330,7 @@ type Block struct { // OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used // to instantiate chunk structs. -func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { +func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (pb *Block, err error) { if logger == nil { logger = promslog.NewNopLogger() } @@ -351,7 +351,11 @@ func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool) (pb *Block, } closers = append(closers, cr) - ir, err := index.NewFileReader(filepath.Join(dir, indexFilename)) + decoder := index.DecodePostingsRaw + if postingsDecoderFactory != nil { + decoder = postingsDecoderFactory(meta) + } + ir, err := index.NewFileReader(filepath.Join(dir, indexFilename), decoder) if err != nil { return nil, err } diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 3589b42c1..3e25cff3d 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -59,14 +59,14 @@ func TestSetCompactionFailed(t *testing.T) { tmpdir := t.TempDir() blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 1)) - b, err := OpenBlock(nil, blockDir, nil) + b, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) require.False(t, b.meta.Compaction.Failed) require.NoError(t, b.setCompactionFailed()) require.True(t, b.meta.Compaction.Failed) require.NoError(t, b.Close()) - b, err = OpenBlock(nil, blockDir, nil) + b, err = OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) require.True(t, b.meta.Compaction.Failed) require.NoError(t, b.Close()) @@ -74,7 +74,7 @@ func TestSetCompactionFailed(t *testing.T) { func TestCreateBlock(t *testing.T) { tmpdir := t.TempDir() - b, err := OpenBlock(nil, createBlock(t, tmpdir, genSeries(1, 1, 0, 10)), nil) + b, err := OpenBlock(nil, createBlock(t, tmpdir, genSeries(1, 1, 0, 10)), nil, nil) require.NoError(t, err) require.NoError(t, b.Close()) } @@ -84,7 +84,7 @@ func BenchmarkOpenBlock(b *testing.B) { blockDir := createBlock(b, tmpdir, genSeries(1e6, 20, 0, 10)) b.Run("benchmark", func(b *testing.B) { for i := 0; i < b.N; i++ { - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(b, err) require.NoError(b, block.Close()) } @@ -190,7 +190,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, f.Close()) // Check open err. - b, err := OpenBlock(nil, blockDir, nil) + b, err := OpenBlock(nil, blockDir, nil, nil) if tc.openErr != nil { require.EqualError(t, err, tc.openErr.Error()) return @@ -245,7 +245,7 @@ func TestLabelValuesWithMatchers(t *testing.T) { require.NotEmpty(t, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, block.Close()) }() @@ -325,7 +325,7 @@ func TestBlockQuerierReturnsSortedLabelValues(t *testing.T) { blockDir := createBlock(t, tmpdir, seriesEntries) // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) }) @@ -352,7 +352,7 @@ func TestBlockSize(t *testing.T) { // Create a block and compare the reported size vs actual disk size. { blockDirInit = createBlock(t, tmpdir, genSeries(10, 1, 1, 100)) - blockInit, err = OpenBlock(nil, blockDirInit, nil) + blockInit, err = OpenBlock(nil, blockDirInit, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, blockInit.Close()) @@ -377,7 +377,7 @@ func TestBlockSize(t *testing.T) { blockDirsAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil) require.NoError(t, err) require.Len(t, blockDirsAfterCompact, 1) - blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirsAfterCompact[0].String()), nil) + blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirsAfterCompact[0].String()), nil, nil) require.NoError(t, err) defer func() { require.NoError(t, blockAfterCompact.Close()) @@ -408,7 +408,7 @@ func TestReadIndexFormatV1(t *testing.T) { */ blockDir := filepath.Join("testdata", "index_format_v1") - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) q, err := NewBlockQuerier(block, 0, 1000) @@ -445,7 +445,7 @@ func BenchmarkLabelValuesWithMatchers(b *testing.B) { require.NotEmpty(b, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) }() @@ -497,7 +497,7 @@ func TestLabelNamesWithMatchers(t *testing.T) { require.NotEmpty(t, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) }) @@ -551,7 +551,7 @@ func TestBlockIndexReader_PostingsForLabelMatching(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, files, "No chunk created.") - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) }) diff --git a/tsdb/blockwriter_test.go b/tsdb/blockwriter_test.go index 4ec25df70..2704b5356 100644 --- a/tsdb/blockwriter_test.go +++ b/tsdb/blockwriter_test.go @@ -47,7 +47,7 @@ func TestBlockWriter(t *testing.T) { // Confirm the block has the correct data. blockpath := filepath.Join(outputDir, id.String()) - b, err := OpenBlock(nil, blockpath, nil) + b, err := OpenBlock(nil, blockpath, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, b.Close()) }() q, err := NewBlockQuerier(b, math.MinInt64, math.MaxInt64) diff --git a/tsdb/chunkenc/chunk_test.go b/tsdb/chunkenc/chunk_test.go index e6b89be40..a5e75ca32 100644 --- a/tsdb/chunkenc/chunk_test.go +++ b/tsdb/chunkenc/chunk_test.go @@ -132,7 +132,7 @@ func TestPool(t *testing.T) { { name: "invalid encoding", encoding: EncNone, - expErr: fmt.Errorf(`invalid chunk encoding "none"`), + expErr: errors.New(`invalid chunk encoding "none"`), }, } { t.Run(tc.name, func(t *testing.T) { diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index f18eb77da..1c7e2c3ac 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -15,6 +15,7 @@ package chunkenc import ( "encoding/binary" + "errors" "fmt" "math" @@ -761,9 +762,9 @@ func (a *FloatHistogramAppender) AppendFloatHistogram(prev *FloatHistogramAppend if !okToAppend || counterReset { if appendOnly { if counterReset { - return nil, false, a, fmt.Errorf("float histogram counter reset") + return nil, false, a, errors.New("float histogram counter reset") } - return nil, false, a, fmt.Errorf("float histogram schema change") + return nil, false, a, errors.New("float histogram schema change") } newChunk := NewFloatHistogramChunk() app, err := newChunk.Appender() @@ -812,7 +813,7 @@ func (a *FloatHistogramAppender) AppendFloatHistogram(prev *FloatHistogramAppend pForwardInserts, nForwardInserts, pBackwardInserts, nBackwardInserts, pMergedSpans, nMergedSpans, okToAppend := a.appendableGauge(h) if !okToAppend { if appendOnly { - return nil, false, a, fmt.Errorf("float gauge histogram schema change") + return nil, false, a, errors.New("float gauge histogram schema change") } newChunk := NewFloatHistogramChunk() app, err := newChunk.Appender() diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index f8796d64e..bb747e135 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -15,6 +15,7 @@ package chunkenc import ( "encoding/binary" + "errors" "fmt" "math" @@ -795,9 +796,9 @@ func (a *HistogramAppender) AppendHistogram(prev *HistogramAppender, t int64, h if !okToAppend || counterReset { if appendOnly { if counterReset { - return nil, false, a, fmt.Errorf("histogram counter reset") + return nil, false, a, errors.New("histogram counter reset") } - return nil, false, a, fmt.Errorf("histogram schema change") + return nil, false, a, errors.New("histogram schema change") } newChunk := NewHistogramChunk() app, err := newChunk.Appender() @@ -846,7 +847,7 @@ func (a *HistogramAppender) AppendHistogram(prev *HistogramAppender, t int64, h pForwardInserts, nForwardInserts, pBackwardInserts, nBackwardInserts, pMergedSpans, nMergedSpans, okToAppend := a.appendableGauge(h) if !okToAppend { if appendOnly { - return nil, false, a, fmt.Errorf("gauge histogram schema change") + return nil, false, a, errors.New("gauge histogram schema change") } newChunk := NewHistogramChunk() app, err := newChunk.Appender() diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index ec0f6d403..f505d762b 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -16,6 +16,7 @@ package chunks import ( "bufio" "encoding/binary" + "errors" "fmt" "hash" "hash/crc32" @@ -172,7 +173,7 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) { return emptyChunk, err } if newChunk != nil { - return emptyChunk, fmt.Errorf("did not expect to start a second chunk") + return emptyChunk, errors.New("did not expect to start a second chunk") } case chunkenc.ValFloatHistogram: newChunk, _, ca, err = ca.AppendFloatHistogram(nil, s.Get(i).T(), s.Get(i).FH(), false) @@ -180,7 +181,7 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) { return emptyChunk, err } if newChunk != nil { - return emptyChunk, fmt.Errorf("did not expect to start a second chunk") + return emptyChunk, errors.New("did not expect to start a second chunk") } default: panic(fmt.Sprintf("unknown sample type %s", sampleType.String())) @@ -250,7 +251,7 @@ func (cm *Meta) OverlapsClosedInterval(mint, maxt int64) bool { return cm.MinTime <= maxt && mint <= cm.MaxTime } -var errInvalidSize = fmt.Errorf("invalid size") +var errInvalidSize = errors.New("invalid size") var castagnoliTable *crc32.Table diff --git a/tsdb/compact.go b/tsdb/compact.go index ff35679e3..74fde4c78 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -87,6 +87,7 @@ type LeveledCompactor struct { maxBlockChunkSegmentSize int64 mergeFunc storage.VerticalChunkSeriesMergeFunc postingsEncoder index.PostingsEncoder + postingsDecoderFactory PostingsDecoderFactory enableOverlappingCompaction bool } @@ -158,6 +159,9 @@ type LeveledCompactorOptions struct { // PE specifies the postings encoder. It is called when compactor is writing out the postings for a label name/value pair during compaction. // If it is nil then the default encoder is used. At the moment that is the "raw" encoder. See index.EncodePostingsRaw for more. PE index.PostingsEncoder + // PD specifies the postings decoder factory to return different postings decoder based on BlockMeta. It is called when opening a block or opening the index file. + // If it is nil then a default decoder is used, compatible with Prometheus v2. + PD PostingsDecoderFactory // MaxBlockChunkSegmentSize is the max block chunk segment size. If it is 0 then the default chunks.DefaultChunkSegmentSize is used. MaxBlockChunkSegmentSize int64 // MergeFunc is used for merging series together in vertical compaction. By default storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) is used. @@ -167,6 +171,12 @@ type LeveledCompactorOptions struct { EnableOverlappingCompaction bool } +type PostingsDecoderFactory func(meta *BlockMeta) index.PostingsDecoder + +func DefaultPostingsDecoderFactory(_ *BlockMeta) index.PostingsDecoder { + return index.DecodePostingsRaw +} + func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: maxBlockChunkSegmentSize, @@ -184,7 +194,7 @@ func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l *slog.L func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { if len(ranges) == 0 { - return nil, fmt.Errorf("at least one range must be provided") + return nil, errors.New("at least one range must be provided") } if pool == nil { pool = chunkenc.NewPool() @@ -213,6 +223,7 @@ func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer maxBlockChunkSegmentSize: maxBlockChunkSegmentSize, mergeFunc: mergeFunc, postingsEncoder: pe, + postingsDecoderFactory: opts.PD, enableOverlappingCompaction: opts.EnableOverlappingCompaction, }, nil } @@ -477,7 +488,7 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, if b == nil { var err error - b, err = OpenBlock(c.logger, d, c.chunkPool) + b, err = OpenBlock(c.logger, d, c.chunkPool, c.postingsDecoderFactory) if err != nil { return nil, err } diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 5123d6e62..310fe8f25 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -1153,7 +1153,7 @@ func BenchmarkCompaction(b *testing.B) { blockDirs := make([]string, 0, len(c.ranges)) var blocks []*Block for _, r := range c.ranges { - block, err := OpenBlock(nil, createBlock(b, dir, genSeries(nSeries, 10, r[0], r[1])), nil) + block, err := OpenBlock(nil, createBlock(b, dir, genSeries(nSeries, 10, r[0], r[1])), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer func() { @@ -1549,7 +1549,7 @@ func TestHeadCompactionWithHistograms(t *testing.T) { require.Len(t, ids, 1) // Open the block and query it and check the histograms. - block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, ids[0].String()), nil) + block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, ids[0].String()), nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) @@ -1911,7 +1911,7 @@ func TestCompactEmptyResultBlockWithTombstone(t *testing.T) { ctx := context.Background() tmpdir := t.TempDir() blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 10)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Write tombstone covering the whole block. err = block.Delete(ctx, 0, 10, labels.MustNewMatcher(labels.MatchEqual, defaultLabelName, "0")) diff --git a/tsdb/db.go b/tsdb/db.go index 997bad36c..2cfa4f363 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -52,6 +52,9 @@ const ( // DefaultBlockDuration in milliseconds. DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond) + // DefaultCompactionDelayMaxPercent in percentage. + DefaultCompactionDelayMaxPercent = 10 + // Block dir suffixes to make deletion and creation operations atomic. // We decided to do suffixes instead of creating meta.json as last (or delete as first) one, // because in error case you still can recover meta.json from the block content within local TSDB dir. @@ -86,7 +89,9 @@ func DefaultOptions() *Options { EnableOverlappingCompaction: true, EnableSharding: false, EnableDelayedCompaction: false, + CompactionDelayMaxPercent: DefaultCompactionDelayMaxPercent, CompactionDelay: time.Duration(0), + PostingsDecoderFactory: DefaultPostingsDecoderFactory, } } @@ -204,6 +209,8 @@ type Options struct { // CompactionDelay delays the start time of auto compactions. // It can be increased by up to one minute if the DB does not commit too often. CompactionDelay time.Duration + // CompactionDelayMaxPercent is the upper limit for CompactionDelay, specified as a percentage of the head chunk range. + CompactionDelayMaxPercent int // NewCompactorFunc is a function that returns a TSDB compactor. NewCompactorFunc NewCompactorFunc @@ -213,6 +220,10 @@ type Options struct { // BlockChunkQuerierFunc is a function to return storage.ChunkQuerier from a BlockReader. BlockChunkQuerierFunc BlockChunkQuerierFunc + + // PostingsDecoderFactory allows users to customize postings decoders based on BlockMeta. + // By default, DefaultPostingsDecoderFactory will be used to create raw posting decoder. + PostingsDecoderFactory PostingsDecoderFactory } type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) @@ -627,7 +638,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { return nil, ErrClosed default: } - loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil) + loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil, DefaultPostingsDecoderFactory) if err != nil { return nil, err } @@ -725,7 +736,7 @@ func (db *DBReadOnly) LastBlockID() (string, error) { } // Block returns a block reader by given block id. -func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { +func (db *DBReadOnly) Block(blockID string, postingsDecoderFactory PostingsDecoderFactory) (BlockReader, error) { select { case <-db.closed: return nil, ErrClosed @@ -737,7 +748,7 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { return nil, fmt.Errorf("invalid block ID %s", blockID) } - block, err := OpenBlock(db.logger, filepath.Join(db.dir, blockID), nil) + block, err := OpenBlock(db.logger, filepath.Join(db.dir, blockID), nil, postingsDecoderFactory) if err != nil { return nil, err } @@ -896,6 +907,7 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn db.compactor, err = NewLeveledCompactorWithOptions(ctx, r, l, rngs, db.chunkPool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: opts.MaxBlockChunkSegmentSize, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, + PD: opts.PostingsDecoderFactory, }) } if err != nil { @@ -1562,7 +1574,7 @@ func (db *DB) reloadBlocks() (err error) { db.mtx.Lock() defer db.mtx.Unlock() - loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool) + loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool, db.opts.PostingsDecoderFactory) if err != nil { return err } @@ -1657,7 +1669,7 @@ func (db *DB) reloadBlocks() (err error) { return nil } -func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { +func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { return nil, nil, fmt.Errorf("find blocks: %w", err) @@ -1674,7 +1686,7 @@ func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc. // See if we already have the block in memory or open it otherwise. block, open := getBlock(loaded, meta.ULID) if !open { - block, err = OpenBlock(l, bDir, chunkPool) + block, err = OpenBlock(l, bDir, chunkPool, postingsDecoderFactory) if err != nil { corrupted[meta.ULID] = err continue @@ -1986,8 +1998,7 @@ func (db *DB) EnableCompactions() { } func (db *DB) generateCompactionDelay() time.Duration { - // Up to 10% of the head's chunkRange. - return time.Duration(rand.Int63n(db.head.chunkRange.Load()/10)) * time.Millisecond + return time.Duration(rand.Int63n(db.head.chunkRange.Load()*int64(db.opts.CompactionDelayMaxPercent)/100)) * time.Millisecond } // ForceHeadMMap is intended for use only in tests and benchmarks. @@ -1999,10 +2010,10 @@ func (db *DB) ForceHeadMMap() { // will create a new block containing all data that's currently in the memory buffer/WAL. func (db *DB) Snapshot(dir string, withHead bool) error { if dir == db.dir { - return fmt.Errorf("cannot snapshot into base directory") + return errors.New("cannot snapshot into base directory") } if _, err := ulid.ParseStrict(dir); err == nil { - return fmt.Errorf("dir must not be a valid ULID") + return errors.New("dir must not be a valid ULID") } db.cmtx.Lock() diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 8c216956d..f851f2b91 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -15,14 +15,18 @@ package tsdb import ( "bufio" + "bytes" "context" "encoding/binary" + "errors" "flag" "fmt" "hash/crc32" "log/slog" "math" "math/rand" + "net/http" + "net/http/httptest" "os" "path" "path/filepath" @@ -41,6 +45,12 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage/remote" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -1310,7 +1320,7 @@ func TestTombstoneCleanFail(t *testing.T) { totalBlocks := 2 for i := 0; i < totalBlocks; i++ { blockDir := createBlock(t, db.Dir(), genSeries(1, 1, int64(i), int64(i)+1)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Add some fake tombstones to trigger the compaction. tomb := tombstones.NewMemTombstones() @@ -1365,7 +1375,7 @@ func TestTombstoneCleanRetentionLimitsRace(t *testing.T) { // Generate some blocks with old mint (near epoch). for j := 0; j < totalBlocks; j++ { blockDir := createBlock(t, dbDir, genSeries(10, 1, int64(j), int64(j)+1)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Cover block with tombstones so it can be deleted with CleanTombstones() as well. tomb := tombstones.NewMemTombstones() @@ -1423,10 +1433,10 @@ func (*mockCompactorFailing) Plan(string) ([]string, error) { func (c *mockCompactorFailing) Write(dest string, _ BlockReader, _, _ int64, _ *BlockMeta) ([]ulid.ULID, error) { if len(c.blocks) >= c.max { - return []ulid.ULID{}, fmt.Errorf("the compactor already did the maximum allowed blocks so it is time to fail") + return []ulid.ULID{}, errors.New("the compactor already did the maximum allowed blocks so it is time to fail") } - block, err := OpenBlock(nil, createBlock(c.t, dest, genSeries(1, 1, 0, 1)), nil) + block, err := OpenBlock(nil, createBlock(c.t, dest, genSeries(1, 1, 0, 1)), nil, nil) require.NoError(c.t, err) require.NoError(c.t, block.Close()) // Close block as we won't be using anywhere. c.blocks = append(c.blocks, block) @@ -1450,7 +1460,7 @@ func (*mockCompactorFailing) Compact(string, []string, []*Block) ([]ulid.ULID, e } func (*mockCompactorFailing) CompactOOO(string, *OOOCompactionHead) (result []ulid.ULID, err error) { - return nil, fmt.Errorf("mock compaction failing CompactOOO") + return nil, errors.New("mock compaction failing CompactOOO") } func TestTimeRetention(t *testing.T) { @@ -2499,13 +2509,13 @@ func TestDBReadOnly(t *testing.T) { }) t.Run("block", func(t *testing.T) { blockID := expBlock.meta.ULID.String() - block, err := dbReadOnly.Block(blockID) + block, err := dbReadOnly.Block(blockID, nil) require.NoError(t, err) require.Equal(t, expBlock.Meta(), block.Meta(), "block meta mismatch") }) t.Run("invalid block ID", func(t *testing.T) { blockID := "01GTDVZZF52NSWB5SXQF0P2PGF" - _, err := dbReadOnly.Block(blockID) + _, err := dbReadOnly.Block(blockID, nil) require.Error(t, err) }) t.Run("last block ID", func(t *testing.T) { @@ -4748,7 +4758,7 @@ func TestMultipleEncodingsCommitOrder(t *testing.T) { seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) require.Len(t, seriesSet, 1) gotSamples := seriesSet[series1.String()] - requireEqualSamples(t, series1.String(), expSamples, gotSamples, true) + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) // Verify chunks querier. chunkQuerier, err := db.ChunkQuerier(minT, maxT) @@ -4766,7 +4776,7 @@ func TestMultipleEncodingsCommitOrder(t *testing.T) { gotChunkSamples = append(gotChunkSamples, smpls...) require.NoError(t, it.Err()) } - requireEqualSamples(t, series1.String(), expSamples, gotChunkSamples, true) + requireEqualSamples(t, series1.String(), expSamples, gotChunkSamples, requireEqualSamplesIgnoreCounterResets) } var expSamples []chunks.Sample @@ -5695,16 +5705,33 @@ func testQuerierOOOQuery(t *testing.T, gotSamples := seriesSet[series1.String()] require.NotNil(t, gotSamples) require.Len(t, seriesSet, 1) - requireEqualSamples(t, series1.String(), expSamples, gotSamples, true) + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) requireEqualOOOSamples(t, oooSamples, db) }) } } func TestChunkQuerierOOOQuery(t *testing.T) { + nBucketHistogram := func(n int64) *histogram.Histogram { + h := &histogram.Histogram{ + Count: uint64(n), + Sum: float64(n), + } + if n == 0 { + h.PositiveSpans = []histogram.Span{} + h.PositiveBuckets = []int64{} + return h + } + h.PositiveSpans = []histogram.Span{{Offset: 0, Length: uint32(n)}} + h.PositiveBuckets = make([]int64, n) + h.PositiveBuckets[0] = 1 + return h + } + scenarios := map[string]struct { - appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) - sampleFunc func(ts int64) chunks.Sample + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) + sampleFunc func(ts int64) chunks.Sample + checkInUseBucket bool }{ "float": { appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { @@ -5749,10 +5776,24 @@ func TestChunkQuerierOOOQuery(t *testing.T) { return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} }, }, + "integer histogram with recode": { + // Histograms have increasing number of buckets so their chunks are recoded. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + n := ts / time.Minute.Milliseconds() + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nBucketHistogram(n), nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + n := ts / time.Minute.Milliseconds() + return sample{t: ts, h: nBucketHistogram(n)} + }, + // Only check in-use buckets for this scenario. + // Recoding adds empty buckets. + checkInUseBucket: true, + }, } for name, scenario := range scenarios { t.Run(name, func(t *testing.T) { - testChunkQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc) + testChunkQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc, scenario.checkInUseBucket) }) } } @@ -5760,6 +5801,7 @@ func TestChunkQuerierOOOQuery(t *testing.T) { func testChunkQuerierOOOQuery(t *testing.T, appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error), sampleFunc func(ts int64) chunks.Sample, + checkInUseBuckets bool, ) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 @@ -5999,10 +6041,28 @@ func testChunkQuerierOOOQuery(t *testing.T, it := chunk.Chunk.Iterator(nil) smpls, err := storage.ExpandSamples(it, newSample) require.NoError(t, err) + + // Verify that no sample is outside the chunk's time range. + for i, s := range smpls { + switch i { + case 0: + require.Equal(t, chunk.MinTime, s.T(), "first sample %v not at chunk min time %v", s, chunk.MinTime) + case len(smpls) - 1: + require.Equal(t, chunk.MaxTime, s.T(), "last sample %v not at chunk max time %v", s, chunk.MaxTime) + default: + require.GreaterOrEqual(t, s.T(), chunk.MinTime, "sample %v before chunk min time %v", s, chunk.MinTime) + require.LessOrEqual(t, s.T(), chunk.MaxTime, "sample %v after chunk max time %v", s, chunk.MaxTime) + } + } + gotSamples = append(gotSamples, smpls...) require.NoError(t, it.Err()) } - requireEqualSamples(t, series1.String(), expSamples, gotSamples, true) + if checkInUseBuckets { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets, requireEqualSamplesInUseBucketCompare) + } else { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) + } }) } } @@ -8791,7 +8851,7 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { // Include blockID into series to identify which block got touched. serieses := []storage.Series{storage.NewListSeries(labels.FromMap(map[string]string{"block": fmt.Sprintf("block-%d", i), labels.MetricName: "test_metric"}), []chunks.Sample{sample{t: 0, f: 1}})} blockDir := createBlock(t, db.Dir(), serieses) - b, err := OpenBlock(db.logger, blockDir, db.chunkPool) + b, err := OpenBlock(db.logger, blockDir, db.chunkPool, nil) require.NoError(t, err) // Overwrite meta.json with compaction section for testing purpose. @@ -8837,23 +8897,151 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { } func TestGenerateCompactionDelay(t *testing.T) { - assertDelay := func(delay time.Duration) { + assertDelay := func(delay time.Duration, expectedMaxPercentDelay int) { t.Helper() require.GreaterOrEqual(t, delay, time.Duration(0)) - // Less than 10% of the chunkRange. - require.LessOrEqual(t, delay, 6000*time.Millisecond) + // Expect to generate a delay up to MaxPercentDelay of the head chunk range + require.LessOrEqual(t, delay, (time.Duration(60000*expectedMaxPercentDelay/100) * time.Millisecond)) } opts := DefaultOptions() - opts.EnableDelayedCompaction = true - db := openTestDB(t, opts, []int64{60000}) - defer func() { - require.NoError(t, db.Close()) - }() - // The offset is generated and changed while opening. - assertDelay(db.opts.CompactionDelay) + cases := []struct { + compactionDelayPercent int + }{ + { + compactionDelayPercent: 1, + }, + { + compactionDelayPercent: 10, + }, + { + compactionDelayPercent: 60, + }, + { + compactionDelayPercent: 100, + }, + } - for i := 0; i < 1000; i++ { - assertDelay(db.generateCompactionDelay()) + opts.EnableDelayedCompaction = true + + for _, c := range cases { + opts.CompactionDelayMaxPercent = c.compactionDelayPercent + db := openTestDB(t, opts, []int64{60000}) + defer func() { + require.NoError(t, db.Close()) + }() + // The offset is generated and changed while opening. + assertDelay(db.opts.CompactionDelay, c.compactionDelayPercent) + + for i := 0; i < 1000; i++ { + assertDelay(db.generateCompactionDelay(), c.compactionDelayPercent) + } + } +} + +type blockedResponseRecorder struct { + r *httptest.ResponseRecorder + + // writeblocked is used to block writing until the test wants it to resume. + writeBlocked chan struct{} + // writeStarted is closed by blockedResponseRecorder to signal that writing has started. + writeStarted chan struct{} +} + +func (br *blockedResponseRecorder) Write(buf []byte) (int, error) { + select { + case <-br.writeStarted: + default: + close(br.writeStarted) + } + + <-br.writeBlocked + return br.r.Write(buf) +} + +func (br *blockedResponseRecorder) Header() http.Header { return br.r.Header() } + +func (br *blockedResponseRecorder) WriteHeader(code int) { br.r.WriteHeader(code) } + +func (br *blockedResponseRecorder) Flush() { br.r.Flush() } + +// TestBlockClosingBlockedDuringRemoteRead ensures that a TSDB Block is not closed while it is being queried +// through remote read. This is a regression test for https://github.com/prometheus/prometheus/issues/14422. +// TODO: Ideally, this should reside in storage/remote/read_handler_test.go once the necessary TSDB utils are accessible there. +func TestBlockClosingBlockedDuringRemoteRead(t *testing.T) { + dir := t.TempDir() + + createBlock(t, dir, genSeries(2, 1, 0, 10)) + db, err := Open(dir, nil, nil, nil, nil) + require.NoError(t, err) + // No error checking as manually closing the block is supposed to make this fail. + defer db.Close() + + readAPI := remote.NewReadHandler(nil, nil, db, func() config.Config { + return config.Config{} + }, + 0, 1, 0, + ) + + matcher, err := labels.NewMatcher(labels.MatchRegexp, "__name__", ".*") + require.NoError(t, err) + + query, err := remote.ToQuery(0, 10, []*labels.Matcher{matcher}, nil) + require.NoError(t, err) + + req := &prompb.ReadRequest{ + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}, + } + data, err := proto.Marshal(req) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(snappy.Encode(nil, data))) + require.NoError(t, err) + + blockedRecorder := &blockedResponseRecorder{ + r: httptest.NewRecorder(), + writeBlocked: make(chan struct{}), + writeStarted: make(chan struct{}), + } + + readDone := make(chan struct{}) + go func() { + readAPI.ServeHTTP(blockedRecorder, request) + require.Equal(t, http.StatusOK, blockedRecorder.r.Code) + close(readDone) + }() + + // Wait for the read API to start streaming data. + <-blockedRecorder.writeStarted + + // Try to close the queried block. + blockClosed := make(chan struct{}) + go func() { + for _, block := range db.Blocks() { + block.Close() + } + close(blockClosed) + }() + + // Closing the queried block should block. + // Wait a little bit to make sure of that. + select { + case <-time.After(100 * time.Millisecond): + case <-readDone: + require.Fail(t, "read API should still be streaming data.") + case <-blockClosed: + require.Fail(t, "Block shouldn't get closed while being queried.") + } + + // Resume the read API data streaming. + close(blockedRecorder.writeBlocked) + <-readDone + + // The block should be no longer needed and closing it should end. + select { + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Closing the block timed out.") + case <-blockClosed: } } diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index d32870f70..31d461bed 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -152,13 +152,13 @@ func (ce *CircularExemplarStorage) Querier(_ context.Context) (storage.ExemplarQ func (ce *CircularExemplarStorage) Select(start, end int64, matchers ...[]*labels.Matcher) ([]exemplar.QueryResult, error) { ret := make([]exemplar.QueryResult, 0) + ce.lock.RLock() + defer ce.lock.RUnlock() + if len(ce.exemplars) == 0 { return ret, nil } - ce.lock.RLock() - defer ce.lock.RUnlock() - // Loop through each index entry, which will point us to first/last exemplar for each series. for _, idx := range ce.index { var se exemplar.QueryResult @@ -281,13 +281,13 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { l = 0 } + ce.lock.Lock() + defer ce.lock.Unlock() + if l == int64(len(ce.exemplars)) { return 0 } - ce.lock.Lock() - defer ce.lock.Unlock() - oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) @@ -349,6 +349,11 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byt } func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemplar) error { + // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. + // Optimize by moving the lock to be per series (& benchmark it). + ce.lock.Lock() + defer ce.lock.Unlock() + if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -356,11 +361,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp var buf [1024]byte seriesLabels := l.Bytes(buf[:]) - // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. - // Optimize by moving the lock to be per series (& benchmark it). - ce.lock.Lock() - defer ce.lock.Unlock() - idx, ok := ce.index[string(seriesLabels)] err := ce.validateExemplar(idx, e, true) if err != nil { diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 7723ec389..dbd34cc48 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -20,6 +20,7 @@ import ( "reflect" "strconv" "strings" + "sync" "testing" "github.com/prometheus/client_golang/prometheus" @@ -499,3 +500,40 @@ func BenchmarkResizeExemplars(b *testing.B) { }) } } + +// TestCircularExemplarStorage_Concurrent_AddExemplar_Resize tries to provoke a data race between AddExemplar and Resize. +// Run with race detection enabled. +func TestCircularExemplarStorage_Concurrent_AddExemplar_Resize(t *testing.T) { + exs, err := NewCircularExemplarStorage(0, eMetrics) + require.NoError(t, err) + es := exs.(*CircularExemplarStorage) + + l := labels.FromStrings("service", "asdf") + e := exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", "qwerty"), + Value: 0.1, + Ts: 1, + } + + var wg sync.WaitGroup + wg.Add(1) + t.Cleanup(wg.Wait) + + started := make(chan struct{}) + + go func() { + defer wg.Done() + + <-started + for i := 0; i < 100; i++ { + require.NoError(t, es.AddExemplar(l, e)) + } + }() + + for i := 0; i < 100; i++ { + es.Resize(int64(i + 1)) + if i == 0 { + close(started) + } + } +} diff --git a/tsdb/head.go b/tsdb/head.go index 2963d781d..c67c438e5 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -155,10 +155,6 @@ type HeadOptions struct { // OutOfOrderTimeWindow is > 0 EnableOOONativeHistograms atomic.Bool - // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. - // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md - EnableCreatedTimestampZeroIngestion bool - ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. ChunkDirRoot string diff --git a/tsdb/head_append.go b/tsdb/head_append.go index adfd5d4bf..ea2a163f2 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -40,6 +40,12 @@ type initAppender struct { var _ storage.GetRef = &initAppender{} +func (a *initAppender) SetOptions(opts *storage.AppendOptions) { + if a.app != nil { + a.app.SetOptions(opts) + } +} + func (a *initAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if a.app != nil { return a.app.Append(ref, lset, t, v) @@ -326,6 +332,11 @@ type headAppender struct { appendID, cleanupAppendIDsBelow uint64 closed bool + hints *storage.AppendOptions +} + +func (a *headAppender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts } func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { @@ -345,27 +356,32 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 } } + s.Lock() if value.IsStaleNaN(v) { - // This is not thread safe as we should be holding the lock for "s". // TODO(krajorama): reorganize Commit() to handle samples in append order // not floats first and then histograms. Then we could do this conversion // in commit. This code should move into Commit(). switch { case s.lastHistogramValue != nil: + s.Unlock() return a.AppendHistogram(ref, lset, t, &histogram.Histogram{Sum: v}, nil) case s.lastFloatHistogramValue != nil: + s.Unlock() return a.AppendHistogram(ref, lset, t, nil, &histogram.FloatHistogram{Sum: v}) } } - s.Lock() + defer s.Unlock() // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise // to skip that sample from the WAL and write only in the WBL. - _, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) + isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { + if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + return 0, storage.ErrOutOfOrderSample + } s.pendingCommit = true } - s.Unlock() if delta > 0 { a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } @@ -458,9 +474,10 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (s *memSeries, created bo return s, created, nil } -// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) -// The sample belongs to the out of order chunk if we return true and no error. -// An error signifies the sample cannot be handled. +// appendable checks whether the given sample is valid for appending to the series. +// If the sample is valid and in-order, it returns false with no error. +// If the sample belongs to the out-of-order chunk, it returns true with no error. +// If the sample cannot be handled, it returns an error. func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) { // Check if we can append in the in-order chunk. if t >= minValidTime { @@ -967,23 +984,38 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar { return ret } -// Commit writes to the WAL and adds the data to the Head. -// TODO(codesome): Refactor this method to reduce indentation and make it more readable. -func (a *headAppender) Commit() (err error) { - if a.closed { - return ErrAppenderClosed - } - defer func() { a.closed = true }() - - if err := a.log(); err != nil { - _ = a.Rollback() // Most likely the same error will happen again. - return fmt.Errorf("write to WAL: %w", err) - } - - if a.head.writeNotified != nil { - a.head.writeNotified.Notify() - } +type appenderCommitContext struct { + floatsAppended int + histogramsAppended int + // Number of samples out of order but accepted: with ooo enabled and within time window. + oooFloatsAccepted int + oooHistogramAccepted int + // Number of samples rejected due to: out of order but OOO support disabled. + floatOOORejected int + histoOOORejected int + // Number of samples rejected due to: out of order but too old (OOO support enabled, but outside time window). + floatTooOldRejected int + histoTooOldRejected int + // Number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled). + floatOOBRejected int + histoOOBRejected int + inOrderMint int64 + inOrderMaxt int64 + oooMinT int64 + oooMaxT int64 + wblSamples []record.RefSample + wblHistograms []record.RefHistogramSample + wblFloatHistograms []record.RefFloatHistogramSample + oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef + oooMmapMarkersCount int + oooRecords [][]byte + oooCapMax int64 + appendChunkOpts chunkOpts + enc record.Encoder +} +// commitExemplars adds all exemplars from headAppender to the head's exemplar storage. +func (a *headAppender) commitExemplars() { // No errors logging to WAL, so pass the exemplars along to the in memory storage. for _, e := range a.exemplars { s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) @@ -1001,6 +1033,396 @@ func (a *headAppender) Commit() (err error) { a.head.logger.Debug("Unknown error while adding exemplar", "err", err) } } +} + +func (acc *appenderCommitContext) collectOOORecords(a *headAppender) { + if a.head.wbl == nil { + // WBL is not enabled. So no need to collect. + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil + acc.oooMmapMarkersCount = 0 + return + } + + // The m-map happens before adding a new sample. So we collect + // the m-map markers first, and then samples. + // WBL Graphically: + // WBL Before this Commit(): [old samples before this commit for chunk 1] + // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] + if acc.oooMmapMarkers != nil { + markers := make([]record.RefMmapMarker, 0, acc.oooMmapMarkersCount) + for ref, mmapRefs := range acc.oooMmapMarkers { + for _, mmapRef := range mmapRefs { + markers = append(markers, record.RefMmapMarker{ + Ref: ref, + MmapRef: mmapRef, + }) + } + } + r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + if len(acc.wblSamples) > 0 { + r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblHistograms) > 0 { + r := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblFloatHistograms) > 0 { + r := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil +} + +// handleAppendableError processes errors encountered during sample appending and updates +// the provided counters accordingly. +// +// Parameters: +// - err: The error encountered during appending. +// - appended: Pointer to the counter tracking the number of successfully appended samples. +// - oooRejected: Pointer to the counter tracking the number of out-of-order samples rejected. +// - oobRejected: Pointer to the counter tracking the number of out-of-bounds samples rejected. +// - tooOldRejected: Pointer to the counter tracking the number of too-old samples rejected. +func handleAppendableError(err error, appended, oooRejected, oobRejected, tooOldRejected *int) { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + *appended-- + *oooRejected++ + case errors.Is(err, storage.ErrOutOfBounds): + *appended-- + *oobRejected++ + case errors.Is(err, storage.ErrTooOldSample): + *appended-- + *tooOldRejected++ + default: + *appended-- + } +} + +// commitSamples processes and commits the samples in the headAppender to the series. +// It handles both in-order and out-of-order samples, updating the appenderCommitContext +// with the results of the append operations. +// +// The function iterates over the samples in the headAppender and attempts to append each sample +// to its corresponding series. It handles various error cases such as out-of-order samples, +// out-of-bounds samples, and too-old samples, updating the appenderCommitContext accordingly. +// +// For out-of-order samples, it checks if the sample can be inserted into the series and updates +// the out-of-order mmap markers if necessary. It also updates the write-ahead log (WBL) samples +// and the minimum and maximum timestamps for out-of-order samples. +// +// For in-order samples, it attempts to append the sample to the series and updates the minimum +// and maximum timestamps for in-order samples. +// +// The function also increments the chunk metrics if a new chunk is created and performs cleanup +// operations on the series after appending the samples. +// +// There are also specific functions to commit histograms and float histograms. +func (a *headAppender) commitSamples(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.samples { + series = a.sampleSeries[i] + series.Lock() + + oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) + if err != nil { + handleAppendableError(err, &acc.floatsAppended, &acc.floatOOORejected, &acc.floatOOBRejected, &acc.floatTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != nil means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblSamples = append(acc.wblSamples, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooFloatsAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.floatsAppended-- + } + default: + ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + // The sample is an exact duplicate, and should be silently dropped. + acc.floatsAppended-- + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitHistograms function, see the commitSamples docs. +func (a *headAppender) commitHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.histograms { + series = a.histogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblHistograms = append(acc.wblHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitFloatHistograms function, see the commitSamples docs. +func (a *headAppender) commitFloatHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.floatHistograms { + series = a.floatHistogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblFloatHistograms = append(acc.wblFloatHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// commitMetadata commits the metadata for each series in the headAppender. +// It iterates over the metadata slice and updates the corresponding series +// with the new metadata information. The series is locked during the update +// to ensure thread safety. +func (a *headAppender) commitMetadata() { + var series *memSeries + for i, m := range a.metadata { + series = a.metadataSeries[i] + series.Lock() + series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} + series.Unlock() + } +} + +// Commit writes to the WAL and adds the data to the Head. +// TODO(codesome): Refactor this method to reduce indentation and make it more readable. +func (a *headAppender) Commit() (err error) { + if a.closed { + return ErrAppenderClosed + } + defer func() { a.closed = true }() + + if err := a.log(); err != nil { + _ = a.Rollback() // Most likely the same error will happen again. + return fmt.Errorf("write to WAL: %w", err) + } + + if a.head.writeNotified != nil { + a.head.writeNotified.Notify() + } + + a.commitExemplars() defer a.head.metrics.activeAppenders.Dec() defer a.head.putAppendBuffer(a.samples) @@ -1011,401 +1433,46 @@ func (a *headAppender) Commit() (err error) { defer a.head.putMetadataBuffer(a.metadata) defer a.head.iso.closeAppend(a.appendID) - var ( - floatsAppended = len(a.samples) - histogramsAppended = len(a.histograms) + len(a.floatHistograms) - // number of samples out of order but accepted: with ooo enabled and within time window - oooFloatsAccepted int - oooHistogramAccepted int - // number of samples rejected due to: out of order but OOO support disabled. - floatOOORejected int - histoOOORejected int - // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window) - floatTooOldRejected int - histoTooOldRejected int - // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled) - floatOOBRejected int - histoOOBRejected int - inOrderMint int64 = math.MaxInt64 - inOrderMaxt int64 = math.MinInt64 - oooMinT int64 = math.MaxInt64 - oooMaxT int64 = math.MinInt64 - wblSamples []record.RefSample - wblHistograms []record.RefHistogramSample - wblFloatHistograms []record.RefFloatHistogramSample - oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef - oooMmapMarkersCount int - oooRecords [][]byte - oooCapMax = a.head.opts.OutOfOrderCapMax.Load() - series *memSeries - appendChunkOpts = chunkOpts{ + acc := &appenderCommitContext{ + floatsAppended: len(a.samples), + histogramsAppended: len(a.histograms) + len(a.floatHistograms), + inOrderMint: math.MaxInt64, + inOrderMaxt: math.MinInt64, + oooMinT: math.MaxInt64, + oooMaxT: math.MinInt64, + oooCapMax: a.head.opts.OutOfOrderCapMax.Load(), + appendChunkOpts: chunkOpts{ chunkDiskMapper: a.head.chunkDiskMapper, chunkRange: a.head.chunkRange.Load(), samplesPerChunk: a.head.opts.SamplesPerChunk, - } - enc record.Encoder - ) + }, + } + defer func() { - for i := range oooRecords { - a.head.putBytesBuffer(oooRecords[i][:0]) + for i := range acc.oooRecords { + a.head.putBytesBuffer(acc.oooRecords[i][:0]) } }() - collectOOORecords := func() { - if a.head.wbl == nil { - // WBL is not enabled. So no need to collect. - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil - oooMmapMarkersCount = 0 - return - } - // The m-map happens before adding a new sample. So we collect - // the m-map markers first, and then samples. - // WBL Graphically: - // WBL Before this Commit(): [old samples before this commit for chunk 1] - // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] - if oooMmapMarkers != nil { - markers := make([]record.RefMmapMarker, 0, oooMmapMarkersCount) - for ref, mmapRefs := range oooMmapMarkers { - for _, mmapRef := range mmapRefs { - markers = append(markers, record.RefMmapMarker{ - Ref: ref, - MmapRef: mmapRef, - }) - } - } - r := enc.MmapMarkers(markers, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblSamples) > 0 { - r := enc.Samples(wblSamples, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblHistograms) > 0 { - r := enc.HistogramSamples(wblHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblFloatHistograms) > 0 { - r := enc.FloatHistogramSamples(wblFloatHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } + a.commitSamples(acc) + a.commitHistograms(acc) + a.commitFloatHistograms(acc) + a.commitMetadata() - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil - } - for i, s := range a.samples { - series = a.sampleSeries[i] - series.Lock() + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.oooHistogramAccepted)) + a.head.updateMinMaxTime(acc.inOrderMint, acc.inOrderMaxt) + a.head.updateMinOOOMaxOOOTime(acc.oooMinT, acc.oooMaxT) - oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - floatsAppended-- - floatOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - floatsAppended-- - floatOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - floatsAppended-- - floatTooOldRejected++ - default: - floatsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != nil means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblSamples = append(wblSamples, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooFloatsAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - floatsAppended-- - } - default: - ok, chunkCreated = series.append(s.T, s.V, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - // The sample is an exact duplicate, and should be silently dropped. - floatsAppended-- - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.histograms { - series = a.histogramSeries[i] - series.Lock() - - oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblHistograms = append(wblHistograms, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooHistogramAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- - } - default: - ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.floatHistograms { - series = a.floatHistogramSeries[i] - series.Lock() - - oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblFloatHistograms = append(wblFloatHistograms, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooHistogramAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- - } - default: - ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, m := range a.metadata { - series = a.metadataSeries[i] - series.Lock() - series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} - series.Unlock() - } - - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatTooOldRejected)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatsAppended)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histogramsAppended)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(oooFloatsAccepted)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(oooHistogramAccepted)) - a.head.updateMinMaxTime(inOrderMint, inOrderMaxt) - a.head.updateMinOOOMaxOOOTime(oooMinT, oooMaxT) - - collectOOORecords() + acc.collectOOORecords(a) if a.head.wbl != nil { - if err := a.head.wbl.Log(oooRecords...); err != nil { + if err := a.head.wbl.Log(acc.oooRecords...); err != nil { // TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging // until we have found what samples become OOO. We can try having a metric for this failure. // Returning the error here is not correct because we have already put the samples into the memory, @@ -1450,7 +1517,7 @@ type chunkOpts struct { // append adds the sample (t, v) to the series. The caller also has to provide // the appendID for isolation. (The appendID can be zero, which results in no // isolation for this append.) -// It is unsafe to call this concurrently with s.iterator(...) without holding the series lock. +// Series lock must be held when calling. func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) { c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.EncXOR, o) if !sampleInOrder { diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index 51de50ec2..dc682602b 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,15 +14,22 @@ package tsdb import ( + "context" "errors" + "fmt" + "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/wlog" ) func BenchmarkHeadStripeSeriesCreate(b *testing.B) { @@ -36,7 +43,7 @@ func BenchmarkHeadStripeSeriesCreate(b *testing.B) { defer h.Close() for i := 0; i < b.N; i++ { - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(i))) } } @@ -54,8 +61,8 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - i := int(count.Inc()) - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + i := count.Inc() + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(int(i)))) } }) } @@ -75,7 +82,87 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { defer h.Close() for i := 0; i < b.N; i++ { - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(i))) + } +} + +func BenchmarkHead_WalCommit(b *testing.B) { + seriesCounts := []int{100, 1000, 10000} + series := genSeries(10000, 10, 0, 0) // Only using the generated labels. + + appendSamples := func(b *testing.B, app storage.Appender, seriesCount int, ts int64) { + var err error + for i, s := range series[:seriesCount] { + var ref storage.SeriesRef + // if i is even, append a sample, else append a histogram. + if i%2 == 0 { + ref, err = app.Append(ref, s.Labels(), ts, float64(ts)) + } else { + h := &histogram.Histogram{ + Count: 7 + uint64(ts*5), + ZeroCount: 2 + uint64(ts), + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{ts + 1, 1, -1, 0}, + } + ref, err = app.AppendHistogram(ref, s.Labels(), ts, h, nil) + } + require.NoError(b, err) + + _, err = app.AppendExemplar(ref, s.Labels(), exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }) + require.NoError(b, err) + } + } + + for _, seriesCount := range seriesCounts { + b.Run(fmt.Sprintf("%d series", seriesCount), func(b *testing.B) { + for _, commits := range []int64{1, 2} { // To test commits that create new series and when the series already exists. + b.Run(fmt.Sprintf("%d commits", commits), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + b.StopTimer() + h, w := newTestHead(b, 10000, wlog.CompressionNone, false) + b.Cleanup(func() { + if h != nil { + h.Close() + } + if w != nil { + w.Close() + } + }) + app := h.Appender(context.Background()) + + appendSamples(b, app, seriesCount, 0) + + b.StartTimer() + require.NoError(b, app.Commit()) + if commits == 2 { + b.StopTimer() + app = h.Appender(context.Background()) + appendSamples(b, app, seriesCount, 1) + b.StartTimer() + require.NoError(b, app.Commit()) + } + b.StopTimer() + h.Close() + h = nil + w.Close() + w = nil + } + }) + } + }) } } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 671e85cd7..cc9daa97f 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -5178,7 +5178,7 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { // Passing in true for the 'ignoreCounterResets' parameter prevents differences in counter reset headers // from being factored in to the sample comparison // TODO(fionaliao): understand counter reset behaviour, might want to modify this later - requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, true) + requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, requireEqualSamplesIgnoreCounterResets) require.NoError(t, h.Close()) } diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 3cd00729a..4e199c5bd 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -118,6 +118,8 @@ type symbolCacheEntry struct { type PostingsEncoder func(*encoding.Encbuf, []uint32) error +type PostingsDecoder func(encoding.Decbuf) (int, Postings, error) + // Writer implements the IndexWriter interface for the standard // serialization format. type Writer struct { @@ -438,7 +440,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... return err } if labels.Compare(lset, w.lastSeries) <= 0 { - return fmt.Errorf("out-of-order series added with label set %q", lset) + return fmt.Errorf("out-of-order series added with label set %q, last label set %q", lset, w.lastSeries) } if ref < w.lastSeriesRef && !w.lastSeries.IsEmpty() { @@ -1157,17 +1159,17 @@ func (b realByteSlice) Sub(start, end int) ByteSlice { // NewReader returns a new index reader on the given byte slice. It automatically // handles different format versions. -func NewReader(b ByteSlice) (*Reader, error) { - return newReader(b, io.NopCloser(nil)) +func NewReader(b ByteSlice, decoder PostingsDecoder) (*Reader, error) { + return newReader(b, io.NopCloser(nil), decoder) } // NewFileReader returns a new index reader against the given index file. -func NewFileReader(path string) (*Reader, error) { +func NewFileReader(path string, decoder PostingsDecoder) (*Reader, error) { f, err := fileutil.OpenMmapFile(path) if err != nil { return nil, err } - r, err := newReader(realByteSlice(f.Bytes()), f) + r, err := newReader(realByteSlice(f.Bytes()), f, decoder) if err != nil { return nil, tsdb_errors.NewMulti( err, @@ -1178,7 +1180,7 @@ func NewFileReader(path string) (*Reader, error) { return r, nil } -func newReader(b ByteSlice, c io.Closer) (*Reader, error) { +func newReader(b ByteSlice, c io.Closer, postingsDecoder PostingsDecoder) (*Reader, error) { r := &Reader{ b: b, c: c, @@ -1277,7 +1279,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { r.nameSymbols[off] = k } - r.dec = &Decoder{LookupSymbol: r.lookupSymbol} + r.dec = &Decoder{LookupSymbol: r.lookupSymbol, DecodePostings: postingsDecoder} return r, nil } @@ -1706,7 +1708,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P } // Read from the postings table. d := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.Postings(d.Get()) + _, p, err := r.dec.DecodePostings(d) if err != nil { return nil, fmt.Errorf("decode postings: %w", err) } @@ -1749,7 +1751,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P if val == value { // Read from the postings table. d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.Postings(d2.Get()) + _, p, err := r.dec.DecodePostings(d2) if err != nil { return false, fmt.Errorf("decode postings: %w", err) } @@ -1790,7 +1792,7 @@ func (r *Reader) PostingsForLabelMatching(ctx context.Context, name string, matc if match(val) { // We want this postings iterator since the value is a match postingsDec := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.PostingsFromDecbuf(postingsDec) + _, p, err := r.dec.DecodePostings(postingsDec) if err != nil { return false, fmt.Errorf("decode postings: %w", err) } @@ -1823,7 +1825,7 @@ func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, ma // Read from the postings table. d := encoding.NewDecbufAt(r.b, int(offset), castagnoliTable) - _, p, err := r.dec.PostingsFromDecbuf(d) + _, p, err := r.dec.DecodePostings(d) if err != nil { return ErrPostings(fmt.Errorf("decode postings: %w", err)) } @@ -1918,17 +1920,12 @@ func (s stringListIter) Err() error { return nil } // It currently does not contain decoding methods for all entry types but can be extended // by them if there's demand. type Decoder struct { - LookupSymbol func(context.Context, uint32) (string, error) + LookupSymbol func(context.Context, uint32) (string, error) + DecodePostings PostingsDecoder } -// Postings returns a postings list for b and its number of elements. -func (dec *Decoder) Postings(b []byte) (int, Postings, error) { - d := encoding.Decbuf{B: b} - return dec.PostingsFromDecbuf(d) -} - -// PostingsFromDecbuf returns a postings list for d and its number of elements. -func (dec *Decoder) PostingsFromDecbuf(d encoding.Decbuf) (int, Postings, error) { +// DecodePostingsRaw returns a postings list for d and its number of elements. +func DecodePostingsRaw(d encoding.Decbuf) (int, Postings, error) { n := d.Be32int() l := d.Get() if d.Err() != nil { diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index d81dd8696..0d330cfcb 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -146,7 +146,7 @@ func TestIndexRW_Create_Open(t *testing.T) { require.NoError(t, err) require.NoError(t, iw.Close()) - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(t, err) require.NoError(t, ir.Close()) @@ -157,7 +157,7 @@ func TestIndexRW_Create_Open(t *testing.T) { require.NoError(t, err) f.Close() - _, err = NewFileReader(dir) + _, err = NewFileReader(dir, DecodePostingsRaw) require.Error(t, err) } @@ -218,7 +218,7 @@ func TestIndexRW_Postings(t *testing.T) { }, labelIndices) t.Run("ShardedPostings()", func(t *testing.T) { - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, ir.Close()) @@ -469,7 +469,7 @@ func TestDecbufUvarintWithInvalidBuffer(t *testing.T) { func TestReaderWithInvalidBuffer(t *testing.T) { b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) - _, err := NewReader(b) + _, err := NewReader(b, DecodePostingsRaw) require.Error(t, err) } @@ -481,7 +481,7 @@ func TestNewFileReaderErrorNoOpenFiles(t *testing.T) { err := os.WriteFile(idxName, []byte("corrupted contents"), 0o666) require.NoError(t, err) - _, err = NewFileReader(idxName) + _, err = NewFileReader(idxName, DecodePostingsRaw) require.Error(t, err) // dir.Close will fail on Win if idxName fd is not closed on error path. @@ -560,7 +560,8 @@ func BenchmarkReader_ShardedPostings(b *testing.B) { } func TestDecoder_Postings_WrongInput(t *testing.T) { - _, _, err := (&Decoder{}).Postings([]byte("the cake is a lie")) + d := encoding.Decbuf{B: []byte("the cake is a lie")} + _, _, err := (&Decoder{DecodePostings: DecodePostingsRaw}).DecodePostings(d) require.Error(t, err) } @@ -690,7 +691,7 @@ func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSerie } require.NoError(tb, iw.Close()) - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(tb, err) tb.Cleanup(func() { require.NoError(tb, ir.Close()) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 7bc5629ac..d7b497e61 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -24,9 +24,9 @@ import ( "sort" "strings" "sync" + "time" "github.com/bboreham/go-loser" - "github.com/cespare/xxhash/v2" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -293,76 +293,52 @@ func (p *MemPostings) EnsureOrder(numberOfConcurrentProcesses int) { func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected map[labels.Label]struct{}) { p.mtx.Lock() defer p.mtx.Unlock() - if len(p.m) == 0 || len(deleted) == 0 { - return + + process := func(l labels.Label) { + orig := p.m[l.Name][l.Value] + repl := make([]storage.SeriesRef, 0, len(orig)) + for _, id := range orig { + if _, ok := deleted[id]; !ok { + repl = append(repl, id) + } + } + if len(repl) > 0 { + p.m[l.Name][l.Value] = repl + } else { + delete(p.m[l.Name], l.Value) + // Delete the key if we removed all values. + if len(p.m[l.Name]) == 0 { + delete(p.m, l.Name) + } + } } - // Deleting label names mutates p.m map, so it should be done from a single goroutine after nobody else is reading it. - deleteLabelNames := make(chan string, len(p.m)) - - process, wait := processWithBoundedParallelismAndConsistentWorkers( - runtime.GOMAXPROCS(0), - func(l labels.Label) uint64 { return xxhash.Sum64String(l.Name) }, - func(l labels.Label) { - orig := p.m[l.Name][l.Value] - repl := make([]storage.SeriesRef, 0, len(orig)) - for _, id := range orig { - if _, ok := deleted[id]; !ok { - repl = append(repl, id) - } - } - if len(repl) > 0 { - p.m[l.Name][l.Value] = repl - } else { - delete(p.m[l.Name], l.Value) - if len(p.m[l.Name]) == 0 { - // Delete the key if we removed all values. - deleteLabelNames <- l.Name - } - } - }, - ) - + i := 0 for l := range affected { + i++ process(l) + + // From time to time we want some readers to go through and read their postings. + // It takes around 50ms to process a 1K series batch, and 120ms to process a 10K series batch (local benchmarks on an M3). + // Note that a read query will most likely want to read multiple postings lists, say 5, 10 or 20 (depending on the number of matchers) + // And that read query will most likely evaluate only one of those matchers before we unpause here, so we want to pause often. + if i%512 == 0 { + p.mtx.Unlock() + // While it's tempting to just do a `time.Sleep(time.Millisecond)` here, + // it wouldn't ensure use that readers actually were able to get the read lock, + // because if there are writes waiting on same mutex, readers won't be able to get it. + // So we just grab one RLock ourselves. + p.mtx.RLock() + // We shouldn't wait here, because we would be blocking a potential write for no reason. + // Note that if there's a writer waiting for us to unlock, no reader will be able to get the read lock. + p.mtx.RUnlock() //nolint:staticcheck // SA2001: this is an intentionally empty critical section. + // Now we can wait a little bit just to increase the chance of a reader getting the lock. + // If we were deleting 100M series here, pausing every 512 with 1ms sleeps would be an extra of 200s, which is negligible. + time.Sleep(time.Millisecond) + p.mtx.Lock() + } } process(allPostingsKey) - wait() - - // Close deleteLabelNames channel and delete the label names requested. - close(deleteLabelNames) - for name := range deleteLabelNames { - delete(p.m, name) - } -} - -// processWithBoundedParallelismAndConsistentWorkers will call f() with bounded parallelism, -// making sure that elements with same hash(T) will always be processed by the same worker. -// Call process() to add more jobs to process, and once finished adding, call wait() to ensure that all jobs are processed. -func processWithBoundedParallelismAndConsistentWorkers[T any](workers int, hash func(T) uint64, f func(T)) (process func(T), wait func()) { - wg := &sync.WaitGroup{} - jobs := make([]chan T, workers) - for i := 0; i < workers; i++ { - wg.Add(1) - jobs[i] = make(chan T, 128) - go func(jobs <-chan T) { - defer wg.Done() - for l := range jobs { - f(l) - } - }(jobs[i]) - } - - process = func(job T) { - jobs[hash(job)%uint64(workers)] <- job - } - wait = func() { - for i := range jobs { - close(jobs[i]) - } - wg.Wait() - } - return process, wait } // Iter calls f for each postings list. It aborts if f returns an error and returns it. @@ -392,14 +368,13 @@ func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) { p.mtx.Unlock() } -func appendWithExponentialGrowth[T any](a []T, v T) (_ []T, copied bool) { +func appendWithExponentialGrowth[T any](a []T, v T) []T { if cap(a) < len(a)+1 { newList := make([]T, len(a), len(a)*2+1) copy(newList, a) a = newList - copied = true } - return append(a, v), copied + return append(a, v) } func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { @@ -408,26 +383,16 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { nm = map[string][]storage.SeriesRef{} p.m[l.Name] = nm } - list, copied := appendWithExponentialGrowth(nm[l.Value], id) + list := appendWithExponentialGrowth(nm[l.Value], id) nm[l.Value] = list - // Return if it shouldn't be ordered, if it only has one element or if it's already ordered. - // The invariant is that the first n-1 items in the list are already sorted. - if !p.ordered || len(list) == 1 || list[len(list)-1] >= list[len(list)-2] { + if !p.ordered { return } - - if !copied { - // We have appended to the existing slice, - // and readers may already have a copy of this postings slice, - // so we need to copy it before sorting. - old := list - list = make([]storage.SeriesRef, len(old), cap(old)) - copy(list, old) - nm[l.Value] = list - } - - // Repair order violations. + // There is no guarantee that no higher ID was inserted before as they may + // be generated independently before adding them to postings. + // We repair order violations on insert. The invariant is that the first n-1 + // items in the list are already sorted. for i := len(list) - 1; i >= 1; i-- { if list[i] >= list[i-1] { break diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 8ee9b9943..96c9ed124 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -973,69 +973,37 @@ func TestMemPostingsStats(t *testing.T) { } func TestMemPostings_Delete(t *testing.T) { - t.Run("some postings", func(t *testing.T) { - p := NewMemPostings() - p.Add(1, labels.FromStrings("lbl1", "a")) - p.Add(2, labels.FromStrings("lbl1", "b")) - p.Add(3, labels.FromStrings("lbl2", "a")) + p := NewMemPostings() + p.Add(1, labels.FromStrings("lbl1", "a")) + p.Add(2, labels.FromStrings("lbl1", "b")) + p.Add(3, labels.FromStrings("lbl2", "a")) - before := p.Get(allPostingsKey.Name, allPostingsKey.Value) - deletedRefs := map[storage.SeriesRef]struct{}{ - 2: {}, - } - affectedLabels := map[labels.Label]struct{}{ - {Name: "lbl1", Value: "b"}: {}, - } - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) + before := p.Get(allPostingsKey.Name, allPostingsKey.Value) + deletedRefs := map[storage.SeriesRef]struct{}{ + 2: {}, + } + affectedLabels := map[labels.Label]struct{}{ + {Name: "lbl1", Value: "b"}: {}, + } + p.Delete(deletedRefs, affectedLabels) + after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - // Make sure postings gotten before the delete have the old data when - // iterated over. - expanded, err := ExpandPostings(before) - require.NoError(t, err) - require.Equal(t, []storage.SeriesRef{1, 2, 3}, expanded) + // Make sure postings gotten before the delete have the old data when + // iterated over. + expanded, err := ExpandPostings(before) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{1, 2, 3}, expanded) - // Make sure postings gotten after the delete have the new data when - // iterated over. - expanded, err = ExpandPostings(after) - require.NoError(t, err) - require.Equal(t, []storage.SeriesRef{1, 3}, expanded) + // Make sure postings gotten after the delete have the new data when + // iterated over. + expanded, err = ExpandPostings(after) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{1, 3}, expanded) - deleted := p.Get("lbl1", "b") - expanded, err = ExpandPostings(deleted) - require.NoError(t, err) - require.Empty(t, expanded, "expected empty postings, got %v", expanded) - }) - - t.Run("all postings", func(t *testing.T) { - p := NewMemPostings() - p.Add(1, labels.FromStrings("lbl1", "a")) - p.Add(2, labels.FromStrings("lbl1", "b")) - p.Add(3, labels.FromStrings("lbl2", "a")) - - deletedRefs := map[storage.SeriesRef]struct{}{1: {}, 2: {}, 3: {}} - affectedLabels := map[labels.Label]struct{}{ - {Name: "lbl1", Value: "a"}: {}, - {Name: "lbl1", Value: "b"}: {}, - {Name: "lbl1", Value: "c"}: {}, - } - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - expanded, err := ExpandPostings(after) - require.NoError(t, err) - require.Empty(t, expanded) - }) - - t.Run("nothing on empty mempostings", func(t *testing.T) { - p := NewMemPostings() - deletedRefs := map[storage.SeriesRef]struct{}{} - affectedLabels := map[labels.Label]struct{}{} - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - expanded, err := ExpandPostings(after) - require.NoError(t, err) - require.Empty(t, expanded) - }) + deleted := p.Get("lbl1", "b") + expanded, err = ExpandPostings(deleted) + require.NoError(t, err) + require.Empty(t, expanded, "expected empty postings, got %v", expanded) } // BenchmarkMemPostings_Delete is quite heavy, so consider running it with @@ -1057,7 +1025,7 @@ func BenchmarkMemPostings_Delete(b *testing.B) { return s } - const total = 2e6 + const total = 1e6 allSeries := [total]labels.Labels{} nameValues := make([]string, 0, 100) for i := 0; i < total; i++ { @@ -1507,58 +1475,3 @@ func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { require.Error(t, p.Err()) require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. } - -func TestMemPostings_Unordered_Add_Get(t *testing.T) { - mp := NewMemPostings() - for ref := storage.SeriesRef(1); ref < 8; ref += 2 { - // First, add next series. - next := ref + 1 - mp.Add(next, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(next)))) - nextPostings := mp.Get(labels.MetricName, "test") - - // Now add current ref. - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - - // Next postings should still reference the next series. - nextExpanded, err := ExpandPostings(nextPostings) - require.NoError(t, err) - require.Len(t, nextExpanded, int(ref)) - require.Equal(t, next, nextExpanded[len(nextExpanded)-1]) - } -} - -func TestMemPostings_Concurrent_Add_Get(t *testing.T) { - refs := make(chan storage.SeriesRef) - wg := sync.WaitGroup{} - wg.Add(1) - t.Cleanup(wg.Wait) - t.Cleanup(func() { close(refs) }) - - mp := NewMemPostings() - go func() { - defer wg.Done() - for ref := range refs { - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - p := mp.Get(labels.MetricName, "test") - - _, err := ExpandPostings(p) - if err != nil { - t.Errorf("unexpected error: %s", err) - } - } - }() - - for ref := storage.SeriesRef(1); ref < 8; ref += 2 { - // Add next ref in another goroutine so they would race. - refs <- ref + 1 - // Add current ref here - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - - // We don't read the value of the postings here, - // this is tested in TestMemPostings_Unordered_Add_Get where it's easier to achieve the determinism. - // This test just checks that there's no data race. - p := mp.Get(labels.MetricName, "test") - _, err := ExpandPostings(p) - require.NoError(t, err) - } -} diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index 8d1527e05..bc1cb67d1 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -509,7 +509,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, }) require.Nil(t, iterable) - require.Equal(t, err, fmt.Errorf("not found")) + require.EqualError(t, err, "not found") require.Nil(t, c) }) @@ -878,7 +878,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { } resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } @@ -1054,7 +1054,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( it := iterable.Iterator(nil) resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go index b9badfea2..b1641e29b 100644 --- a/tsdb/ooo_head_test.go +++ b/tsdb/ooo_head_test.go @@ -27,7 +27,6 @@ import ( const testMaxSize int = 32 -// Formulas chosen to make testing easy. // Formulas chosen to make testing easy. func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values func valOdd(pos int) int64 { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. diff --git a/tsdb/querier.go b/tsdb/querier.go index 1083cbba0..f741c5e28 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -193,6 +193,11 @@ func selectChunkSeriesSet(ctx context.Context, sortSeries bool, hints *storage.S // PostingsForMatchers assembles a single postings iterator against the index reader // based on the given matchers. The resulting postings are not ordered by series. func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) { + if len(ms) == 1 && ms[0].Name == "" && ms[0].Value == "" { + k, v := index.AllPostingsKey() + return ix.Postings(ctx, k, v) + } + var its, notIts []index.Postings // See which label must be non-empty. // Optimization for case like {l=~".", l!="1"}. @@ -247,13 +252,10 @@ func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matc return nil, ctx.Err() } switch { - case m.Name == "" && m.Value == "": // Special-case for AllPostings, used in tests at least. - k, v := index.AllPostingsKey() - allPostings, err := ix.Postings(ctx, k, v) - if err != nil { - return nil, err - } - its = append(its, allPostings) + case m.Name == "" && m.Value == "": + // We already handled the case at the top of the function, + // and it is unexpected to get all postings again here. + return nil, errors.New("unexpected all postings") case m.Type == labels.MatchRegexp && m.Value == ".*": // .* regexp matches any string: do nothing. case m.Type == labels.MatchNotRegexp && m.Value == ".*": @@ -1022,9 +1024,9 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { if newChunk != nil { if !recoded { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) + cmint = t } currentChunk = newChunk - cmint = t } cmaxt = t diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index 33dca1284..8b8f092a8 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -75,7 +75,7 @@ func BenchmarkQuerier(b *testing.B) { b.Run("Block", func(b *testing.B) { blockdir := createBlockFromHead(b, b.TempDir(), h) - block, err := OpenBlock(nil, blockdir, nil) + block, err := OpenBlock(nil, blockdir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) @@ -315,7 +315,7 @@ func BenchmarkQuerierSelect(b *testing.B) { tmpdir := b.TempDir() blockdir := createBlockFromHead(b, tmpdir, h) - block, err := OpenBlock(nil, blockdir, nil) + block, err := OpenBlock(nil, blockdir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 77772937a..e97aea2fd 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2443,7 +2443,7 @@ func BenchmarkQueryIterator(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -2506,7 +2506,7 @@ func BenchmarkQuerySeek(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -2641,7 +2641,7 @@ func BenchmarkSetMatcher(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -3209,7 +3209,7 @@ func BenchmarkQueries(b *testing.B) { qs := make([]storage.Querier, 0, 10) for x := 0; x <= 10; x++ { - block, err := OpenBlock(nil, createBlock(b, dir, series), nil) + block, err := OpenBlock(nil, createBlock(b, dir, series), nil, nil) require.NoError(b, err) q, err := NewBlockQuerier(block, 1, nSamples) require.NoError(b, err) @@ -3324,7 +3324,7 @@ func (m mockMatcherIndex) LabelNames(context.Context, ...*labels.Matcher) ([]str } func (m mockMatcherIndex) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { - return index.ErrPostings(fmt.Errorf("PostingsForLabelMatching called")) + return index.ErrPostings(errors.New("PostingsForLabelMatching called")) } func TestPostingsForMatcher(t *testing.T) { @@ -3787,3 +3787,35 @@ func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[ func (m mockReaderOfLabels) Symbols() index.StringIter { panic("Series called") } + +// TestMergeQuerierConcurrentSelectMatchers reproduces the data race bug from +// https://github.com/prometheus/prometheus/issues/14723, when one of the queriers (blockQuerier in this case) +// alters the passed matchers. +func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { + block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil, nil) + require.NoError(t, err) + defer func() { + require.NoError(t, block.Close()) + }() + p, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + // A secondary querier is required to enable concurrent select; a blockQuerier is used for simplicity. + s, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + originalMatchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "baz", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), + } + matchers := append([]*labels.Matcher{}, originalMatchers...) + + mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) + defer func() { + require.NoError(t, mergedQuerier.Close()) + }() + + mergedQuerier.Select(context.Background(), false, nil, matchers...) + + require.Equal(t, originalMatchers, matchers) +} diff --git a/tsdb/repair_test.go b/tsdb/repair_test.go index 8a70e05f3..8a192c4f7 100644 --- a/tsdb/repair_test.go +++ b/tsdb/repair_test.go @@ -79,7 +79,7 @@ func TestRepairBadIndexVersion(t *testing.T) { require.NoError(t, os.MkdirAll(filepath.Join(tmpDbDir, "chunks"), 0o777)) // Read current index to check integrity. - r, err := index.NewFileReader(filepath.Join(tmpDbDir, indexFilename)) + r, err := index.NewFileReader(filepath.Join(tmpDbDir, indexFilename), index.DecodePostingsRaw) require.NoError(t, err) p, err := r.Postings(ctx, "b", "1") require.NoError(t, err) @@ -97,7 +97,7 @@ func TestRepairBadIndexVersion(t *testing.T) { require.NoError(t, err) db.Close() - r, err = index.NewFileReader(filepath.Join(tmpDbDir, indexFilename)) + r, err = index.NewFileReader(filepath.Join(tmpDbDir, indexFilename), index.DecodePostingsRaw) require.NoError(t, err) defer r.Close() p, err = r.Postings(ctx, "b", "1") diff --git a/tsdb/testutil.go b/tsdb/testutil.go index ab6aab79f..03587f4e2 100644 --- a/tsdb/testutil.go +++ b/tsdb/testutil.go @@ -111,7 +111,7 @@ func requireEqualSeries(t *testing.T, expected, actual map[string][]chunks.Sampl for name, expectedItem := range expected { actualItem, ok := actual[name] require.True(t, ok, "Expected series %s not found", name) - requireEqualSamples(t, name, expectedItem, actualItem, ignoreCounterResets) + requireEqualSamples(t, name, expectedItem, actualItem, requireEqualSamplesIgnoreCounterResets) } for name := range actual { _, ok := expected[name] @@ -126,7 +126,28 @@ func requireEqualOOOSamples(t *testing.T, expectedSamples int, db *DB) { "number of ooo appended samples mismatch") } -func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, ignoreCounterResets bool) { +type requireEqualSamplesOption int + +const ( + requireEqualSamplesNoOption requireEqualSamplesOption = iota + requireEqualSamplesIgnoreCounterResets + requireEqualSamplesInUseBucketCompare +) + +func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, options ...requireEqualSamplesOption) { + var ( + ignoreCounterResets bool + inUseBucketCompare bool + ) + for _, option := range options { + switch option { + case requireEqualSamplesIgnoreCounterResets: + ignoreCounterResets = true + case requireEqualSamplesInUseBucketCompare: + inUseBucketCompare = true + } + } + require.Equal(t, len(expected), len(actual), "Length not equal to expected for %s", name) for i, s := range expected { expectedSample := s @@ -144,6 +165,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.H().Compact(0) + actualSample.H().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } case s.FH() != nil: @@ -156,6 +181,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.FH().Compact(0) + actualSample.FH().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } default: diff --git a/tsdb/tsdbblockutil.go b/tsdb/tsdbblockutil.go index b49757223..af2348019 100644 --- a/tsdb/tsdbblockutil.go +++ b/tsdb/tsdbblockutil.go @@ -15,6 +15,7 @@ package tsdb import ( "context" + "errors" "fmt" "log/slog" "path/filepath" @@ -23,7 +24,7 @@ import ( "github.com/prometheus/prometheus/tsdb/chunkenc" ) -var ErrInvalidTimes = fmt.Errorf("max time is lesser than min time") +var ErrInvalidTimes = errors.New("max time is lesser than min time") // CreateBlock creates a chunkrange block from the samples passed to it, and writes it to disk. func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger *slog.Logger) (string, error) { diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go index acbb4bec8..ee5bcb72d 100644 --- a/util/convertnhcb/convertnhcb.go +++ b/util/convertnhcb/convertnhcb.go @@ -14,158 +14,222 @@ package convertnhcb import ( + "errors" "fmt" "math" "sort" "strings" - "github.com/grafana/regexp" - "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" ) -var histogramNameSuffixReplacements = []struct { - pattern *regexp.Regexp - repl string -}{ - { - pattern: regexp.MustCompile(`_bucket$`), - repl: "", - }, - { - pattern: regexp.MustCompile(`_sum$`), - repl: "", - }, - { - pattern: regexp.MustCompile(`_count$`), - repl: "", - }, +var ( + errNegativeBucketCount = errors.New("bucket count must be non-negative") + errNegativeCount = errors.New("count must be non-negative") + errCountMismatch = errors.New("count mismatch") + errCountNotCumulative = errors.New("count is not cumulative") +) + +type tempHistogramBucket struct { + le float64 + count float64 } // TempHistogram is used to collect information about classic histogram // samples incrementally before creating a histogram.Histogram or // histogram.FloatHistogram based on the values collected. type TempHistogram struct { - BucketCounts map[float64]float64 - Count float64 - Sum float64 - HasFloat bool + buckets []tempHistogramBucket + count float64 + sum float64 + err error + hasCount bool } // NewTempHistogram creates a new TempHistogram to // collect information about classic histogram samples. func NewTempHistogram() TempHistogram { return TempHistogram{ - BucketCounts: map[float64]float64{}, + buckets: make([]tempHistogramBucket, 0, 10), } } -func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) { - bucketCounts := map[float64]int64{} - for le, count := range h.BucketCounts { - intCount := int64(math.Round(count)) - if float64(intCount) != count { - return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le) +func (h TempHistogram) Err() error { + return h.err +} + +func (h *TempHistogram) Reset() { + h.buckets = h.buckets[:0] + h.count = 0 + h.sum = 0 + h.err = nil + h.hasCount = false +} + +func (h *TempHistogram) SetBucketCount(boundary, count float64) error { + if h.err != nil { + return h.err + } + if count < 0 { + h.err = fmt.Errorf("%w: le=%g, count=%g", errNegativeBucketCount, boundary, count) + return h.err + } + // Assume that the elements are added in order. + switch { + case len(h.buckets) == 0: + h.buckets = append(h.buckets, tempHistogramBucket{le: boundary, count: count}) + case h.buckets[len(h.buckets)-1].le < boundary: + // Happy case is "<". + if count < h.buckets[len(h.buckets)-1].count { + h.err = fmt.Errorf("%w: %g < %g", errCountNotCumulative, count, h.buckets[len(h.buckets)-1].count) + return h.err } - bucketCounts[le] = intCount - } - return bucketCounts, nil -} - -// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native -// histogram with custom buckets based on the provided upper bounds. -// Everything is set except the bucket counts. -// The sorted upper bounds are also returned. -func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) { - sort.Float64s(upperBounds0) - var upperBounds []float64 - if needsDedup { - upperBounds = make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { - upperBounds = append(upperBounds, le) - prevLE = le - } + h.buckets = append(h.buckets, tempHistogramBucket{le: boundary, count: count}) + case h.buckets[len(h.buckets)-1].le == boundary: + // Ignore this, as it is a duplicate sample. + default: + // Find the correct position to insert. + i := sort.Search(len(h.buckets), func(i int) bool { + return h.buckets[i].le >= boundary + }) + if h.buckets[i].le == boundary { + // Ignore this, as it is a duplicate sample. + return nil } - } else { - upperBounds = upperBounds0 - } - var customBounds []float64 - if upperBounds[len(upperBounds)-1] == math.Inf(1) { - customBounds = upperBounds[:len(upperBounds)-1] - } else { - customBounds = upperBounds - } - return upperBounds, &histogram.Histogram{ - Count: 0, - Sum: 0, - Schema: histogram.CustomBucketsSchema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: uint32(len(upperBounds))}, - }, - PositiveBuckets: make([]int64, len(upperBounds)), - CustomValues: customBounds, - } -} - -// NewHistogram fills the bucket counts in the provided histogram.Histogram -// or histogram.FloatHistogram based on the provided temporary histogram and -// upper bounds. -func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) { - intBucketCounts, err := histogram.getIntBucketCounts() - if err != nil { - return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase) - } - return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil -} - -func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram { - h := hBase.Copy() - absBucketCounts := make([]int64, len(h.PositiveBuckets)) - var prevCount, total int64 - for i, le := range upperBounds { - currCount, exists := bucketCounts[le] - if !exists { - currCount = 0 + if i > 0 && count < h.buckets[i-1].count { + h.err = fmt.Errorf("%w: %g < %g", errCountNotCumulative, count, h.buckets[i-1].count) + return h.err } - count := currCount - prevCount - absBucketCounts[i] = count - total += count - prevCount = currCount + if count > h.buckets[i].count { + h.err = fmt.Errorf("%w: %g > %g", errCountNotCumulative, count, h.buckets[i].count) + return h.err + } + // Insert at the correct position unless duplicate. + h.buckets = append(h.buckets, tempHistogramBucket{}) + copy(h.buckets[i+1:], h.buckets[i:]) + h.buckets[i] = tempHistogramBucket{le: boundary, count: count} } - h.PositiveBuckets[0] = absBucketCounts[0] - for i := 1; i < len(h.PositiveBuckets); i++ { - h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1] - } - h.Sum = histogram.Sum - if histogram.Count != 0 { - total = int64(histogram.Count) - } - h.Count = uint64(total) - return h.Compact(0) + return nil } -func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { - fh := fhBase.Copy() - var prevCount, total float64 - for i, le := range upperBounds { - currCount, exists := bucketCounts[le] - if !exists { - currCount = 0 +func (h *TempHistogram) SetCount(count float64) error { + if h.err != nil { + return h.err + } + if count < 0 { + h.err = fmt.Errorf("%w: count=%g", errNegativeCount, count) + return h.err + } + h.count = count + h.hasCount = true + return nil +} + +func (h *TempHistogram) SetSum(sum float64) error { + if h.err != nil { + return h.err + } + h.sum = sum + return nil +} + +func (h TempHistogram) Convert() (*histogram.Histogram, *histogram.FloatHistogram, error) { + if h.err != nil { + return nil, nil, h.err + } + + if len(h.buckets) == 0 || h.buckets[len(h.buckets)-1].le != math.Inf(1) { + // No +Inf bucket. + if !h.hasCount && len(h.buckets) > 0 { + // No count either, so set count to the last known bucket's count. + h.count = h.buckets[len(h.buckets)-1].count } - count := currCount - prevCount - fh.PositiveBuckets[i] = count - total += count - prevCount = currCount + // Let the last bucket be +Inf with the overall count. + h.buckets = append(h.buckets, tempHistogramBucket{le: math.Inf(1), count: h.count}) } - fh.Sum = histogram.Sum - if histogram.Count != 0 { - total = histogram.Count + + if !h.hasCount { + h.count = h.buckets[len(h.buckets)-1].count + h.hasCount = true } - fh.Count = total - return fh.Compact(0) + + for _, b := range h.buckets { + intCount := int64(math.Round(b.count)) + if b.count != float64(intCount) { + return h.convertToFloatHistogram() + } + } + + intCount := uint64(math.Round(h.count)) + if h.count != float64(intCount) { + return h.convertToFloatHistogram() + } + return h.convertToIntegerHistogram(intCount) +} + +func (h TempHistogram) convertToIntegerHistogram(count uint64) (*histogram.Histogram, *histogram.FloatHistogram, error) { + rh := &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: count, + Sum: h.sum, + PositiveSpans: []histogram.Span{{Length: uint32(len(h.buckets))}}, + PositiveBuckets: make([]int64, len(h.buckets)), + } + + if len(h.buckets) > 1 { + rh.CustomValues = make([]float64, len(h.buckets)-1) // Not storing the last +Inf bucket. + } + + prevCount := int64(0) + prevDelta := int64(0) + for i, b := range h.buckets { + // delta is the actual bucket count as the input is cumulative. + delta := int64(b.count) - prevCount + rh.PositiveBuckets[i] = delta - prevDelta + prevCount = int64(b.count) + prevDelta = delta + if b.le != math.Inf(1) { + rh.CustomValues[i] = b.le + } + } + + if count != uint64(h.buckets[len(h.buckets)-1].count) { + h.err = fmt.Errorf("%w: count=%d != le=%g count=%g", errCountMismatch, count, h.buckets[len(h.buckets)-1].le, h.buckets[len(h.buckets)-1].count) + return nil, nil, h.err + } + + return rh.Compact(2), nil, nil +} + +func (h TempHistogram) convertToFloatHistogram() (*histogram.Histogram, *histogram.FloatHistogram, error) { + rh := &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: h.count, + Sum: h.sum, + PositiveSpans: []histogram.Span{{Length: uint32(len(h.buckets))}}, + PositiveBuckets: make([]float64, len(h.buckets)), + } + + if len(h.buckets) > 1 { + rh.CustomValues = make([]float64, len(h.buckets)-1) // Not storing the last +Inf bucket. + } + + prevCount := 0.0 + for i, b := range h.buckets { + rh.PositiveBuckets[i] = b.count - prevCount + prevCount = b.count + if b.le != math.Inf(1) { + rh.CustomValues[i] = b.le + } + } + + if h.count != h.buckets[len(h.buckets)-1].count { + h.err = fmt.Errorf("%w: count=%g != le=%g count=%g", errCountMismatch, h.count, h.buckets[len(h.buckets)-1].le, h.buckets[len(h.buckets)-1].count) + return nil, nil, h.err + } + + return nil, rh.Compact(0), nil } func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { @@ -176,9 +240,18 @@ func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { Labels() } +// GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from +// the metric name. We specifically do not remove the _created suffix as that +// should be removed by the caller. func GetHistogramMetricBaseName(s string) string { - for _, rep := range histogramNameSuffixReplacements { - s = rep.pattern.ReplaceAllString(s, rep.repl) + if r, ok := strings.CutSuffix(s, "_bucket"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_sum"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_count"); ok { + return r } return s } diff --git a/util/convertnhcb/convertnhcb_test.go b/util/convertnhcb/convertnhcb_test.go new file mode 100644 index 000000000..7486ac18b --- /dev/null +++ b/util/convertnhcb/convertnhcb_test.go @@ -0,0 +1,189 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "math" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/model/histogram" +) + +func TestNHCBConvert(t *testing.T) { + tests := map[string]struct { + setup func() *TempHistogram + expectedErr error + expectedH *histogram.Histogram + expectedFH *histogram.FloatHistogram + }{ + "empty": { + setup: func() *TempHistogram { + h := NewTempHistogram() + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + PositiveSpans: []histogram.Span{}, + PositiveBuckets: []int64{}, + }, + }, + "sum only": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Sum: 1000.25, + PositiveSpans: []histogram.Span{}, + PositiveBuckets: []int64{}, + }, + }, + "single integer bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + h.SetBucketCount(0.5, 1000) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 1}}, + PositiveBuckets: []int64{1000}, + CustomValues: []float64{0.5}, + }, + }, + "single float bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + h.SetBucketCount(0.5, 1337.42) + return &h + }, + expectedFH: &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1337.42, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 1}}, + PositiveBuckets: []float64{1337.42}, + CustomValues: []float64{0.5}, + }, + }, + "happy case integer bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 1000) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []int64{50, 850, -850}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + "happy case float bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950.5) + h.SetBucketCount(math.Inf(1), 1000) + return &h + }, + expectedFH: &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []float64{50, 900.5, 49.5}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + "non cumulative bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 900) + return &h + }, + expectedErr: errCountNotCumulative, + }, + "negative count": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(-1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 900) + return &h + }, + expectedErr: errNegativeCount, + }, + "mixed order": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetBucketCount(0.5, 50) + h.SetBucketCount(math.Inf(1), 1000) + h.SetBucketCount(1.0, 950) + h.SetCount(1000) + h.SetSum(1000.25) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []int64{50, 850, -850}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + th := test.setup() + h, fh, err := th.Convert() + if test.expectedErr != nil { + require.ErrorIs(t, err, test.expectedErr) + return + } + require.Equal(t, test.expectedH, h) + if h != nil { + require.NoError(t, h.Validate()) + } + require.Equal(t, test.expectedFH, fh) + if fh != nil { + require.NoError(t, fh.Validate()) + } + }) + } +} diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index 37b345b39..d5aee5c09 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -51,11 +51,7 @@ func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { // provided context and log level, and returns false otherwise. It implements // slog.Handler. func (d *Deduper) Enabled(ctx context.Context, level slog.Level) bool { - d.mtx.RLock() - enabled := d.next.Enabled(ctx, level) - d.mtx.RUnlock() - - return enabled + return d.next.Enabled(ctx, level) } // Handle uses the provided context and slog.Record to deduplicate messages @@ -85,43 +81,27 @@ func (d *Deduper) Handle(ctx context.Context, r slog.Record) error { // WithAttrs adds the provided attributes to the Deduper's internal // slog.Logger. It implements slog.Handler. func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { - d.mtx.Lock() - d.next = slog.New(d.next.Handler().WithAttrs(attrs)) - d.mtx.Unlock() - return d + return &Deduper{ + next: slog.New(d.next.Handler().WithAttrs(attrs)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } } // WithGroup adds the provided group name to the Deduper's internal // slog.Logger. It implements slog.Handler. func (d *Deduper) WithGroup(name string) slog.Handler { - d.mtx.Lock() - d.next = slog.New(d.next.Handler().WithGroup(name)) - d.mtx.Unlock() - return d -} + if name == "" { + return d + } -// Info logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Info(). -func (d *Deduper) Info(msg string, args ...any) { - d.next.Info(msg, args...) -} - -// Warn logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Warn(). -func (d *Deduper) Warn(msg string, args ...any) { - d.next.Warn(msg, args...) -} - -// Error logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Error(). -func (d *Deduper) Error(msg string, args ...any) { - d.next.Error(msg, args...) -} - -// Debug logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Debug(). -func (d *Deduper) Debug(msg string, args ...any) { - d.next.Debug(msg, args...) + return &Deduper{ + next: slog.New(d.next.Handler().WithGroup(name)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } } // Stop the Deduper. diff --git a/util/logging/file_test.go b/util/logging/file_test.go index 8ab475433..00752df8d 100644 --- a/util/logging/file_test.go +++ b/util/logging/file_test.go @@ -40,7 +40,7 @@ func TestJSONFileLogger_basic(t *testing.T) { _, err = f.Read(r) require.NoError(t, err) - result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":\{.+\},"msg":"test","hello":"world"}\n`, r) + result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":"file.go:\d+","msg":"test","hello":"world"}\n`, r) require.NoError(t, err) require.True(t, result, "unexpected content: %s", r) diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 7d1f9dda2..e15d591e0 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -30,15 +30,15 @@ import ( // New returns a new TestStorage for testing purposes // that removes all associated files on closing. -func New(t testutil.T) *TestStorage { - stor, err := NewWithError() +func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage { + stor, err := NewWithError(outOfOrderTimeWindow...) require.NoError(t, err) return stor } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. -func NewWithError() (*TestStorage, error) { +func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) { dir, err := os.MkdirTemp("", "test_storage") if err != nil { return nil, fmt.Errorf("opening test directory: %w", err) @@ -51,6 +51,14 @@ func NewWithError() (*TestStorage, error) { opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond) opts.RetentionDuration = 0 opts.EnableNativeHistograms = true + + // Set OutOfOrderTimeWindow if provided, otherwise use default (0) + if len(outOfOrderTimeWindow) > 0 { + opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0] + } else { + opts.OutOfOrderTimeWindow = 0 // Default value is zero + } + db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats()) if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 9fb01f576..c4acafab6 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,6 +15,8 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -1371,7 +1373,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken:omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1458,8 +1461,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1504,6 +1522,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1519,6 +1538,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1545,9 +1565,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1566,6 +1597,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(errors.New("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(errors.New("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7ac2fe569..f5c81ebfa 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -338,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -607,7 +615,7 @@ func TestGetSeries(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorExec, api: &API{ - Queryable: errorTestQueryable{err: fmt.Errorf("generic")}, + Queryable: errorTestQueryable{err: errors.New("generic")}, }, }, { @@ -615,7 +623,7 @@ func TestGetSeries(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorInternal, api: &API{ - Queryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + Queryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, }, } { @@ -709,7 +717,7 @@ func TestQueryExemplars(t *testing.T) { name: "should return errorExec upon genetic error", expectedErrorType: errorExec, api: &API{ - ExemplarQueryable: errorTestQueryable{err: fmt.Errorf("generic")}, + ExemplarQueryable: errorTestQueryable{err: errors.New("generic")}, }, query: url.Values{ "query": []string{`test_metric3{foo="boo"} - test_metric4{foo="bar"}`}, @@ -721,7 +729,7 @@ func TestQueryExemplars(t *testing.T) { name: "should return errorInternal err type is ErrStorage", expectedErrorType: errorInternal, api: &API{ - ExemplarQueryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + ExemplarQueryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, query: url.Values{ "query": []string{`test_metric3{foo="boo"} - test_metric4{foo="bar"}`}, @@ -830,7 +838,7 @@ func TestLabelNames(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorExec, api: &API{ - Queryable: errorTestQueryable{err: fmt.Errorf("generic")}, + Queryable: errorTestQueryable{err: errors.New("generic")}, }, }, { @@ -838,7 +846,7 @@ func TestLabelNames(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorInternal, api: &API{ - Queryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + Queryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, }, } { @@ -2241,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2329,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2410,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2681,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, { endpoint: api.queryExemplars, query: url.Values{ diff --git a/web/ui/mantine-ui/src/components/LabelBadges.tsx b/web/ui/mantine-ui/src/components/LabelBadges.tsx index f60a37f03..8aa713556 100644 --- a/web/ui/mantine-ui/src/components/LabelBadges.tsx +++ b/web/ui/mantine-ui/src/components/LabelBadges.tsx @@ -2,6 +2,7 @@ import { Badge, BadgeVariant, Group, MantineColor, Stack } from "@mantine/core"; import { FC } from "react"; import { escapeString } from "../lib/escapeString"; import badgeClasses from "../Badge.module.css"; +import { maybeQuoteLabelName } from "../promql/utils"; export interface LabelBadgesProps { labels: Record; @@ -30,7 +31,7 @@ export const LabelBadges: FC = ({ }} key={k} > - {k}="{escapeString(v)}" + {maybeQuoteLabelName(k)}="{escapeString(v)}" ); })} diff --git a/web/ui/mantine-ui/src/lib/formatSeries.ts b/web/ui/mantine-ui/src/lib/formatSeries.ts index b79c40076..007659070 100644 --- a/web/ui/mantine-ui/src/lib/formatSeries.ts +++ b/web/ui/mantine-ui/src/lib/formatSeries.ts @@ -1,12 +1,24 @@ +import { + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "../promql/utils"; import { escapeString } from "./escapeString"; +// TODO: Maybe replace this with the new PromLens-derived serialization code in src/promql/serialize.ts? export const formatSeries = (labels: { [key: string]: string }): string => { if (labels === null) { return "scalar"; } + if (metricContainsExtendedCharset(labels.__name__ || "")) { + return `{"${escapeString(labels.__name__)}",${Object.entries(labels) + .filter(([k]) => k !== "__name__") + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) + .join(", ")}}`; + } + return `${labels.__name__ || ""}{${Object.entries(labels) .filter(([k]) => k !== "__name__") - .map(([k, v]) => `${k}="${escapeString(v)}"`) + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) .join(", ")}}`; }; diff --git a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx index 66a7856f5..d03b530f0 100644 --- a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx +++ b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx @@ -5,6 +5,10 @@ import classes from "./SeriesName.module.css"; import { escapeString } from "../../lib/escapeString"; import { useClipboard } from "@mantine/hooks"; import { notifications } from "@mantine/notifications"; +import { + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "../../promql/utils"; interface SeriesNameProps { labels: { [key: string]: string } | null; @@ -15,8 +19,26 @@ const SeriesName: FC = ({ labels, format }) => { const clipboard = useClipboard(); const renderFormatted = (): React.ReactElement => { + const metricExtendedCharset = + labels && metricContainsExtendedCharset(labels.__name__ || ""); + const labelNodes: React.ReactElement[] = []; let first = true; + + // If the metric name uses the extended new charset, we need to escape it, + // put it into the label matcher list, and make sure it's the first item. + if (metricExtendedCharset) { + labelNodes.push( + + + "{escapeString(labels.__name__)}" + + + ); + + first = false; + } + for (const label in labels) { if (label === "__name__") { continue; @@ -37,7 +59,10 @@ const SeriesName: FC = ({ labels, format }) => { }} title="Click to copy label matcher" > - {label}= + + {maybeQuoteLabelName(label)} + + = "{escapeString(labels[label])}" @@ -52,9 +77,11 @@ const SeriesName: FC = ({ labels, format }) => { return ( - - {labels ? labels.__name__ : ""} - + {!metricExtendedCharset && ( + + {labels ? labels.__name__ : ""} + + )} {"{"} {labelNodes} {"}"} diff --git a/web/ui/mantine-ui/src/promql/format.tsx b/web/ui/mantine-ui/src/promql/format.tsx index 05dd7d410..399644408 100644 --- a/web/ui/mantine-ui/src/promql/format.tsx +++ b/web/ui/mantine-ui/src/promql/format.tsx @@ -8,14 +8,21 @@ import ASTNode, { MatrixSelector, } from "./ast"; import { formatPrometheusDuration } from "../lib/formatTime"; -import { maybeParenthesizeBinopChild, escapeString } from "./utils"; +import { + maybeParenthesizeBinopChild, + escapeString, + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "./utils"; export const labelNameList = (labels: string[]): React.ReactNode[] => { return labels.map((l, i) => { return ( {i !== 0 && ", "} - {l} + + {maybeQuoteLabelName(l)} + ); }); @@ -69,27 +76,45 @@ const formatAtAndOffset = ( const formatSelector = ( node: VectorSelector | MatrixSelector ): ReactElement => { - const matchLabels = node.matchers - .filter( - (m) => - !( - m.name === "__name__" && - m.type === matchType.equal && - m.value === node.name - ) - ) - .map((m, i) => ( - - {i !== 0 && ","} - {m.name} - {m.type} - "{escapeString(m.value)}" + const matchLabels: JSX.Element[] = []; + + // If the metric name contains the new extended charset, we need to escape it + // and add it at the beginning of the matchers list in the curly braces. + const metricName = + node.name || + node.matchers.find( + (m) => m.name === "__name__" && m.type === matchType.equal + )?.value || + ""; + const metricExtendedCharset = metricContainsExtendedCharset(metricName); + if (metricExtendedCharset) { + matchLabels.push( + + "{escapeString(metricName)}" - )); + ); + } + + matchLabels.push( + ...node.matchers + .filter((m) => !(m.name === "__name__" && m.type === matchType.equal)) + .map((m, i) => ( + + {(i !== 0 || metricExtendedCharset) && ","} + + {maybeQuoteLabelName(m.name)} + + {m.type} + "{escapeString(m.value)}" + + )) + ); return ( <> - {node.name} + {!metricExtendedCharset && ( + {metricName} + )} {matchLabels.length > 0 && ( <> {"{"} diff --git a/web/ui/mantine-ui/src/promql/serialize.ts b/web/ui/mantine-ui/src/promql/serialize.ts index af9c6ef15..1d2c63f4f 100644 --- a/web/ui/mantine-ui/src/promql/serialize.ts +++ b/web/ui/mantine-ui/src/promql/serialize.ts @@ -11,8 +11,14 @@ import { aggregatorsWithParam, maybeParenthesizeBinopChild, escapeString, + metricContainsExtendedCharset, + maybeQuoteLabelName, } from "./utils"; +const labelNameList = (labels: string[]): string => { + return labels.map((ln) => maybeQuoteLabelName(ln)).join(", "); +}; + const serializeAtAndOffset = ( timestamp: number | null, startOrEnd: StartOrEnd, @@ -28,15 +34,23 @@ const serializeAtAndOffset = ( const serializeSelector = (node: VectorSelector | MatrixSelector): string => { const matchers = node.matchers - .filter( - (m) => - !( - m.name === "__name__" && - m.type === matchType.equal && - m.value === node.name - ) - ) - .map((m) => `${m.name}${m.type}"${escapeString(m.value)}"`); + .filter((m) => !(m.name === "__name__" && m.type === matchType.equal)) + .map( + (m) => `${maybeQuoteLabelName(m.name)}${m.type}"${escapeString(m.value)}"` + ); + + // If the metric name contains the new extended charset, we need to escape it + // and add it at the beginning of the matchers list in the curly braces. + const metricName = + node.name || + node.matchers.find( + (m) => m.name === "__name__" && m.type === matchType.equal + )?.value || + ""; + const metricExtendedCharset = metricContainsExtendedCharset(metricName); + if (metricExtendedCharset) { + matchers.unshift(`"${escapeString(metricName)}"`); + } const range = node.type === nodeType.matrixSelector @@ -48,7 +62,7 @@ const serializeSelector = (node: VectorSelector | MatrixSelector): string => { node.offset ); - return `${node.name}${matchers.length > 0 ? `{${matchers.join(",")}}` : ""}${range}${atAndOffset}`; + return `${!metricExtendedCharset ? metricName : ""}${matchers.length > 0 ? `{${matchers.join(",")}}` : ""}${range}${atAndOffset}`; }; const serializeNode = ( @@ -68,9 +82,9 @@ const serializeNode = ( case nodeType.aggregation: return `${initialInd}${node.op}${ node.without - ? ` without(${node.grouping.join(", ")}) ` + ? ` without(${labelNameList(node.grouping)}) ` : node.grouping.length > 0 - ? ` by(${node.grouping.join(", ")}) ` + ? ` by(${labelNameList(node.grouping)}) ` : "" }(${childListSeparator}${ aggregatorsWithParam.includes(node.op) && node.param !== null @@ -119,16 +133,16 @@ const serializeNode = ( const vm = node.matching; if (vm !== null && (vm.labels.length > 0 || vm.on)) { if (vm.on) { - matching = ` on(${vm.labels.join(", ")})`; + matching = ` on(${labelNameList(vm.labels)})`; } else { - matching = ` ignoring(${vm.labels.join(", ")})`; + matching = ` ignoring(${labelNameList(vm.labels)})`; } if ( vm.card === vectorMatchCardinality.manyToOne || vm.card === vectorMatchCardinality.oneToMany ) { - grouping = ` group_${vm.card === vectorMatchCardinality.manyToOne ? "left" : "right"}(${vm.include.join(",")})`; + grouping = ` group_${vm.card === vectorMatchCardinality.manyToOne ? "left" : "right"}(${labelNameList(vm.include)})`; } } diff --git a/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts b/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts index ea045612c..da4be7ced 100644 --- a/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts +++ b/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts @@ -99,7 +99,7 @@ describe("serializeNode and formatNode", () => { timestamp: null, startOrEnd: null, }, - output: '{__name__="metric_name"} offset 1m', + output: "metric_name offset 1m", }, { // Escaping in label values. @@ -157,6 +157,20 @@ describe("serializeNode and formatNode", () => { }, output: "metric_name[5m] @ start() offset -10m", }, + { + node: { + type: nodeType.vectorSelector, + name: "", // Test formatting a selector with an empty metric name. + matchers: [ + { type: matchType.equal, name: "label1", value: "value1" }, + ], + offset: 0, + timestamp: null, + startOrEnd: null, + }, + output: + '{label1="value1"}', + }, // Aggregations. { @@ -642,6 +656,113 @@ describe("serializeNode and formatNode", () => { == bool on(label1, label2) group_right(label3) …`, }, + // Test new Prometheus 3.0 UTF-8 support. + { + node: { + bool: false, + lhs: { + bool: false, + lhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ä", + }, + { + name: "foo", + type: matchType.equal, + value: "bar", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + grouping: ["a", "ä"], + op: aggregationType.sum, + param: null, + type: nodeType.aggregation, + without: false, + }, + matching: { + card: vectorMatchCardinality.manyToOne, + include: ["c", "ü"], + labels: ["b", "ö"], + on: true, + }, + op: binaryOperatorType.div, + rhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ö", + }, + { + name: "bar", + type: matchType.equal, + value: "foo", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + grouping: ["d", "ä"], + op: aggregationType.sum, + param: null, + type: nodeType.aggregation, + without: true, + }, + type: nodeType.binaryExpr, + }, + matching: { + card: vectorMatchCardinality.oneToOne, + include: [], + labels: ["e", "ö"], + on: false, + }, + op: binaryOperatorType.add, + rhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ü", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + type: nodeType.parenExpr, + }, + type: nodeType.binaryExpr, + }, + output: + 'sum by(a, "ä") ({"metric_ä",foo="bar"}) / on(b, "ö") group_left(c, "ü") sum without(d, "ä") ({"metric_ö",bar="foo"}) + ignoring(e, "ö") ({"metric_ü"})', + prettyOutput: ` sum by(a, "ä") ( + {"metric_ä",foo="bar"} + ) + / on(b, "ö") group_left(c, "ü") + sum without(d, "ä") ( + {"metric_ö",bar="foo"} + ) ++ ignoring(e, "ö") + ( + {"metric_ü"} + )`, + }, ]; tests.forEach((t) => { diff --git a/web/ui/mantine-ui/src/promql/utils.ts b/web/ui/mantine-ui/src/promql/utils.ts index 5477ee596..2addeed8a 100644 --- a/web/ui/mantine-ui/src/promql/utils.ts +++ b/web/ui/mantine-ui/src/promql/utils.ts @@ -267,6 +267,21 @@ export const humanizedValueType: Record = { [valueType.matrix]: "range vector", }; +const metricNameRe = /^[a-zA-Z_:][a-zA-Z0-9_:]*$/; +const labelNameCharsetRe = /^[a-zA-Z_][a-zA-Z0-9_]*$/; + +export const metricContainsExtendedCharset = (str: string) => { + return str !== "" && !metricNameRe.test(str); +}; + +export const labelNameContainsExtendedCharset = (str: string) => { + return !labelNameCharsetRe.test(str); +}; + export const escapeString = (str: string) => { return str.replace(/([\\"])/g, "\\$1"); }; + +export const maybeQuoteLabelName = (str: string) => { + return labelNameContainsExtendedCharset(str) ? `"${escapeString(str)}"` : str; +}; diff --git a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts index 20cf14d4b..e2a1441fb 100644 --- a/web/ui/module/codemirror-promql/src/complete/promql.terms.ts +++ b/web/ui/module/codemirror-promql/src/complete/promql.terms.ts @@ -281,6 +281,12 @@ export const functionIdentifierTerms = [ info: 'Calculate the increase in value over a range of time (for counters)', type: 'function', }, + { + label: 'info', + detail: 'function', + info: 'Add data labels from corresponding info metrics', + type: 'function', + }, { label: 'irate', detail: 'function', diff --git a/web/ui/module/codemirror-promql/src/parser/parser.test.ts b/web/ui/module/codemirror-promql/src/parser/parser.test.ts index fc36fef03..00d452809 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts @@ -968,6 +968,23 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [], }, + { + expr: 'info(rate(http_request_counter_total{}[5m]))', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: 'info(rate(http_request_counter_total[5m]), target_info{service_version=~".+"})', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + to: 78, + message: `expected label selectors as the second argument to "info" function, got [object Object]`, + severity: 'error', + }, + ], + }, ]; testCases.forEach((value) => { const state = createEditorState(value.expr); diff --git a/web/ui/module/codemirror-promql/src/parser/parser.ts b/web/ui/module/codemirror-promql/src/parser/parser.ts index 351183d6b..9b4b917bf 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.ts @@ -275,6 +275,13 @@ export class Parser { } } + if (funcSignature.name === 'info') { + // Verify that the data label selector expression is not prefixed with metric name. + if (args.length > 1 && args[1].getChild(Identifier)) { + this.addDiagnostic(node, `expected label selectors as the second argument to "info" function, got ${args[1].type}`); + } + } + let j = 0; for (let i = 0; i < args.length; i++) { j = i; diff --git a/web/ui/module/codemirror-promql/src/types/function.ts b/web/ui/module/codemirror-promql/src/types/function.ts index 4048e2db0..56d590412 100644 --- a/web/ui/module/codemirror-promql/src/types/function.ts +++ b/web/ui/module/codemirror-promql/src/types/function.ts @@ -50,6 +50,7 @@ import { Hour, Idelta, Increase, + Info, Irate, LabelJoin, LabelReplace, @@ -336,6 +337,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = { variadic: 0, returnType: ValueType.vector, }, + [Info]: { + name: 'info', + argTypes: [ValueType.vector, ValueType.vector], + variadic: 1, + returnType: ValueType.vector, + }, [Irate]: { name: 'irate', argTypes: [ValueType.matrix], diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 3eadc3a53..0883552c8 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -5,6 +5,7 @@ "main": "dist/index.cjs", "type": "module", "exports": { + "types": "./dist/index.d.ts", "import": "./dist/index.es.js", "require": "./dist/index.cjs" }, diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index 977cce44d..f8850fc2b 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -145,6 +145,7 @@ FunctionIdentifier { Hour | Idelta | Increase | + Info | Irate | LabelReplace | LabelJoin | @@ -392,6 +393,7 @@ NumberDurationLiteralInDurationContext { Hour { condFn<"hour"> } Idelta { condFn<"idelta"> } Increase { condFn<"increase"> } + Info { condFn<"info"> } Irate { condFn<"irate"> } LabelReplace { condFn<"label_replace"> } LabelJoin { condFn<"label_join"> } diff --git a/web/ui/react-app/package-lock.json b/web/ui/react-app/package-lock.json index 667eb0b37..d3de03e5e 100644 --- a/web/ui/react-app/package-lock.json +++ b/web/ui/react-app/package-lock.json @@ -1,12 +1,12 @@ { "name": "@prometheus-io/app", - "version": "0.55.0-rc.0", + "version": "0.55.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@prometheus-io/app", - "version": "0.55.0-rc.0", + "version": "0.55.0", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -24,7 +24,7 @@ "@lezer/lr": "^1.4.2", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.55.0-rc.0", + "@prometheus-io/codemirror-promql": "0.55.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", @@ -4341,12 +4341,11 @@ } }, "node_modules/@prometheus-io/codemirror-promql": { - "version": "0.55.0-rc.0", - "resolved": "https://registry.npmjs.org/@prometheus-io/codemirror-promql/-/codemirror-promql-0.55.0-rc.0.tgz", - "integrity": "sha512-BlDKH2eB8Sd9bQmQjvJvncvZ+VTtrtReSO6qWZXULyrXp+FEjONybOH3Ejq/0a2hat0GpZzcEfwKqPbdy4WdCQ==", - "license": "Apache-2.0", + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@prometheus-io/codemirror-promql/-/codemirror-promql-0.55.0.tgz", + "integrity": "sha512-W+aBBToIvxHbcDsQYJSpgaMtcLUCy3SMIK6jluaEgJrkpOfEJnItZu/rvCC/ehCz2c+h+6WkPJklH8WogsXyEg==", "dependencies": { - "@prometheus-io/lezer-promql": "0.55.0-rc.0", + "@prometheus-io/lezer-promql": "0.55.0", "lru-cache": "^7.18.3" }, "engines": { @@ -4362,10 +4361,9 @@ } }, "node_modules/@prometheus-io/lezer-promql": { - "version": "0.55.0-rc.0", - "resolved": "https://registry.npmjs.org/@prometheus-io/lezer-promql/-/lezer-promql-0.55.0-rc.0.tgz", - "integrity": "sha512-Ikaabw8gfu0HI2D2rKykLBWio+ytTEE03bdZDMpILYULoeGVPdKgbeGLLI9Kafyv48Qiis55o60EfDoywiRHqA==", - "license": "Apache-2.0", + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@prometheus-io/lezer-promql/-/lezer-promql-0.55.0.tgz", + "integrity": "sha512-DHg6l6pfDnE8eLsj4DyXhFDe7OsqSBw2EnSVG4biddzLsIF5gXKazIswYTGHJ26CGHHiDPcbXjhlm9dEWI2aJA==", "peerDependencies": { "@lezer/highlight": "^1.1.2", "@lezer/lr": "^1.2.3" diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index c3236caa4..d75aba5c4 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.55.0-rc.0", + "version": "0.55.0", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.2", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.55.0-rc.0", + "@prometheus-io/codemirror-promql": "0.55.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/react-app/src/pages/alerts/AlertContents.tsx b/web/ui/react-app/src/pages/alerts/AlertContents.tsx index a619f69fc..c5aed9183 100644 --- a/web/ui/react-app/src/pages/alerts/AlertContents.tsx +++ b/web/ui/react-app/src/pages/alerts/AlertContents.tsx @@ -37,6 +37,7 @@ interface RuleGroup { file: string; rules: Rule[]; interval: number; + labels: Record; } const kvSearchRule = new KVSearch({ @@ -93,6 +94,7 @@ const AlertsContent: FC = ({ groups = [], statsCount }) => { name: group.name, interval: group.interval, rules: ruleFilterList.map((value) => value.original), + labels: group.labels, }); } } @@ -114,6 +116,7 @@ const AlertsContent: FC = ({ groups = [], statsCount }) => { name: group.name, interval: group.interval, rules: group.rules.filter((value) => filter[value.state]), + labels: group.labels, }; if (newGroup.rules.length > 0) { result.push(newGroup); diff --git a/web/ui/react-app/src/pages/rules/RulesContent.tsx b/web/ui/react-app/src/pages/rules/RulesContent.tsx index 9bb866d47..cd42a337f 100644 --- a/web/ui/react-app/src/pages/rules/RulesContent.tsx +++ b/web/ui/react-app/src/pages/rules/RulesContent.tsx @@ -17,6 +17,7 @@ interface RuleGroup { rules: Rule[]; evaluationTime: string; lastEvaluation: string; + labels: Record; } export interface RulesMap { @@ -105,10 +106,10 @@ export const RulesContent: FC = ({ response }) => { keep_firing_for: {formatDuration(r.keepFiringFor * 1000)} )} - {r.labels && Object.keys(r.labels).length > 0 && ( + {Object.keys(Object.assign({ ...g.labels }, { ...r.labels })).length > 0 && (
labels: - {Object.entries(r.labels).map(([key, value]) => ( + {Object.entries(Object.assign({ ...g.labels }, { ...r.labels })).map(([key, value]) => (
{key}: {value}