diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 806a706e1..fe8c4704b 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,7 +12,7 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 0fbd01f53..2156e8f19 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: bufbuild/buf-setup-action@517ee23296d5caf38df31c21945e6a54bbc8a89f # v1.30.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 714faf167..a56140c19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: # should also be updated. image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 @@ -27,7 +27,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... @@ -43,7 +43,7 @@ jobs: # The go version in this image should be N-1 wrt test_go. image: quay.io/prometheus/golang-builder:1.21-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -57,7 +57,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/setup_environment with: @@ -74,7 +74,7 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -91,7 +91,7 @@ jobs: container: image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -114,7 +114,7 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: @@ -137,32 +137,44 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/build with: parallelism: 12 thread: ${{ matrix.thread }} - golangci: - name: golangci-lint + check_generated_parser: + name: Check generated parser runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: cache: false go-version: 1.22.x + - name: Run goyacc and check for diff + run: make install-goyacc check-generated-parser + golangci: + name: golangci-lint + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - name: Install Go + uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + with: + go-version: 1.22.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.56.2 + version: v1.59.0 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' @@ -175,7 +187,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_main with: @@ -189,7 +201,7 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - uses: ./.github/promci/actions/publish_release with: @@ -204,7 +216,7 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0 - name: Install nodejs uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 836fb2568..561c22eab 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Initialize CodeQL uses: github/codeql-action/init@012739e5082ff0c22ca6d6ab32e07c36df03c4a4 # v3.22.12 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index d0368eaa1..a7d7e150c 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -17,7 +17,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -37,7 +37,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 4c19563eb..dc510e596 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index 3458d7b11..f1c7ca5d0 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 51ff643ab..0be780f30 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # tag=v4.1.2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # tag=v4.1.4 with: persist-credentials: false @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # tag=v4.3.1 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3 with: name: SARIF file path: results.sarif diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index d645db5de..2370ec5f5 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -1,15 +1,33 @@ FROM gitpod/workspace-full +# Set Node.js version as an environment variable. ENV CUSTOM_NODE_VERSION=16 -ENV CUSTOM_GO_VERSION=1.19 -ENV GOPATH=$HOME/go-packages -ENV GOROOT=$HOME/go -ENV PATH=$GOROOT/bin:$GOPATH/bin:$PATH +# Install and use the specified Node.js version via nvm. RUN bash -c ". .nvm/nvm.sh && nvm install ${CUSTOM_NODE_VERSION} && nvm use ${CUSTOM_NODE_VERSION} && nvm alias default ${CUSTOM_NODE_VERSION}" +# Ensure nvm uses the default Node.js version in all new shells. RUN echo "nvm use default &>/dev/null" >> ~/.bashrc.d/51-nvm-fix -RUN curl -fsSL https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz | tar xzs \ - && printf '%s\n' 'export GOPATH=/workspace/go' \ - 'export PATH=$GOPATH/bin:$PATH' > $HOME/.bashrc.d/300-go +# Remove any existing Go installation in $HOME path. +RUN rm -rf $HOME/go $HOME/go-packages + +# Export go environment variables. +RUN echo "export GOPATH=/workspace/go" >> ~/.bashrc.d/300-go && \ + echo "export GOBIN=\$GOPATH/bin" >> ~/.bashrc.d/300-go && \ + echo "export GOROOT=${HOME}/go" >> ~/.bashrc.d/300-go && \ + echo "export PATH=\$GOROOT/bin:\$GOBIN:\$PATH" >> ~/.bashrc + +# Reload the environment variables to ensure go environment variables are +# available in subsequent commands. +RUN bash -c "source ~/.bashrc && source ~/.bashrc.d/300-go" + +# Fetch the Go version dynamically from the Prometheus go.mod file and Install Go in $HOME path. +RUN export CUSTOM_GO_VERSION=$(curl -sSL "https://raw.githubusercontent.com/prometheus/prometheus/main/go.mod" | awk '/^go/{print $2".0"}') && \ + curl -fsSL "https://dl.google.com/go/go${CUSTOM_GO_VERSION}.linux-amd64.tar.gz" | \ + tar -xz -C $HOME + +# Fetch the goyacc parser version dynamically from the Prometheus Makefile +# and install it globally in $GOBIN path. +RUN GOYACC_VERSION=$(curl -fsSL "https://raw.githubusercontent.com/prometheus/prometheus/main/Makefile" | awk -F'=' '/GOYACC_VERSION \?=/{gsub(/ /, "", $2); print $2}') && \ + go install "golang.org/x/tools/cmd/goyacc@${GOYACC_VERSION}" diff --git a/.golangci.yml b/.golangci.yml index a85a76cdf..f81b29ed2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -21,6 +21,7 @@ linters: - goimports - misspell - nolintlint + - perfsprint - predeclared - revive - testifylint @@ -44,7 +45,9 @@ issues: - linters: - godot source: "^// ===" - + - linters: + - perfsprint + text: "fmt.Sprintf can be replaced with string concatenation" linters-settings: depguard: rules: @@ -85,6 +88,9 @@ linters-settings: local-prefixes: github.com/prometheus/prometheus gofumpt: extra-rules: true + perfsprint: + # Optimizes `fmt.Errorf`. + errorf: false revive: # By default, revive will enable only the linting rules that are named in the configuration file. # So, it's needed to explicitly set in configuration all required rules. diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b594844..71a2195d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## unreleased +* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 +* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 +* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 +* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 +* [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620 +* [BUGFIX] OTLP: Don't generate target_info unless at least one identifying label is defined. #13991 +* [BUGFIX] OTLP: Don't generate target_info unless there are metrics. #13991 + ## 2.52.1 / 2024-05-29 * [BUGFIX] Linode SD: Fix partial fetch when discovery would return more than 500 elements. #14141 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7687826ba..9b1b286cc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,12 @@ go build ./cmd/prometheus/ make test # Make sure all the tests pass before you commit and push :) ``` -We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. +To run a collection of Go linters through [`golangci-lint`](https://github.com/golangci/golangci-lint), do: +```bash +make lint +``` + +If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. See [this section of the golangci-lint documentation](https://golangci-lint.run/usage/false-positives/#nolint-directive) for more information. All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). diff --git a/Makefile b/Makefile index 61e8f4377..5dcebfd1a 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ TSDB_BENCHMARK_DATASET ?= ./tsdb/testdata/20kseries.json TSDB_BENCHMARK_OUTPUT_DIR ?= ./benchout GOLANGCI_LINT_OPTS ?= --timeout 4m +GOYACC_VERSION ?= v0.6.0 include Makefile.common @@ -78,24 +79,42 @@ assets-tarball: assets @echo '>> packaging assets' scripts/package_assets.sh -# We only want to generate the parser when there's changes to the grammar. .PHONY: parser parser: @echo ">> running goyacc to generate the .go file." ifeq (, $(shell command -v goyacc 2> /dev/null)) @echo "goyacc not installed so skipping" - @echo "To install: go install golang.org/x/tools/cmd/goyacc@v0.6.0" + @echo "To install: \"go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION)\" or run \"make install-goyacc\"" else - goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + $(MAKE) promql/parser/generated_parser.y.go endif +promql/parser/generated_parser.y.go: promql/parser/generated_parser.y + @echo ">> running goyacc to generate the .go file." + @goyacc -l -o promql/parser/generated_parser.y.go promql/parser/generated_parser.y + +.PHONY: clean-parser +clean-parser: + @echo ">> cleaning generated parser" + @rm -f promql/parser/generated_parser.y.go + +.PHONY: check-generated-parser +check-generated-parser: clean-parser promql/parser/generated_parser.y.go + @echo ">> checking generated parser" + @git diff --exit-code -- promql/parser/generated_parser.y.go || (echo "Generated parser is out of date. Please run 'make parser' and commit the changes." && false) + +.PHONY: install-goyacc +install-goyacc: + @echo ">> installing goyacc $(GOYACC_VERSION)" + @go install golang.org/x/tools/cmd/goyacc@$(GOYACC_VERSION) + .PHONY: test # If we only want to only test go code we have to change the test target # which is called by all. ifeq ($(GO_ONLY),1) test: common-test check-go-mod-version else -test: common-test ui-build-module ui-test ui-lint check-go-mod-version +test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif .PHONY: npm_licenses diff --git a/Makefile.common b/Makefile.common index 0acfb9d80..161729235 100644 --- a/Makefile.common +++ b/Makefile.common @@ -55,13 +55,13 @@ ifneq ($(shell command -v gotestsum 2> /dev/null),) endif endif -PROMU_VERSION ?= 0.15.0 +PROMU_VERSION ?= 0.17.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.56.2 +GOLANGCI_LINT_VERSION ?= v1.59.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 0e15d5ca5..0532bc380 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -42,6 +42,7 @@ import ( "github.com/mwitkow/go-conntrack" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" "github.com/prometheus/common/promlog" @@ -217,6 +218,7 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true + c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols @@ -251,6 +253,18 @@ func main() { newFlagRetentionDuration model.Duration ) + // Unregister the default GoCollector, and reregister with our defaults. + if prometheus.Unregister(collectors.NewGoCollector()) { + prometheus.MustRegister( + collectors.NewGoCollector( + collectors.WithGoCollectorRuntimeMetrics( + collectors.MetricsGC, + collectors.MetricsScheduler, + ), + ), + ) + } + cfg := flagConfig{ notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, @@ -417,7 +431,7 @@ func main() { serverOnlyFlag(a, "rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) - serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently."). + serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently. When set, \"query.max-concurrency\" may need to be adjusted accordingly."). Default("4").Int64Var(&cfg.maxConcurrentEvals) a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). @@ -771,6 +785,9 @@ func main() { ResendDelay: time.Duration(cfg.resendDelay), MaxConcurrentEvals: cfg.maxConcurrentEvals, ConcurrentEvalsEnabled: cfg.enableConcurrentRuleEval, + DefaultRuleQueryOffset: func() time.Duration { + return time.Duration(cfgFile.GlobalConfig.RuleQueryOffset) + }, }) } diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 03f3a9bc3..89c171bb5 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -24,6 +24,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -189,7 +190,7 @@ func TestSendAlerts(t *testing.T) { for i, tc := range testCases { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { senderFunc := senderFunc(func(alerts ...*notifier.Alert) { require.NotEmpty(t, tc.in, "sender called with 0 alert") require.Equal(t, tc.exp, alerts) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index dd6b56672..9a7a3ed85 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -296,7 +296,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -366,7 +366,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } } diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 601c3ced9..79db428c7 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -88,7 +88,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn blockDuration := getCompatibleBlockDuration(maxBlockDuration) mint = blockDuration * (mint / blockDuration) - db, err := tsdb.OpenDBReadOnly(outputDir, nil) + db, err := tsdb.OpenDBReadOnly(outputDir, "", nil) if err != nil { return err } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index a62ae4fbf..e1d275e97 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -56,8 +56,8 @@ import ( "github.com/prometheus/prometheus/model/rulefmt" "github.com/prometheus/prometheus/notifier" _ "github.com/prometheus/prometheus/plugins" // Register plugins. - "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) @@ -235,12 +235,14 @@ func main() { tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.") dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() - tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped.") + tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() @@ -377,7 +379,7 @@ func main() { case testRulesCmd.FullCommand(): os.Exit(RulesUnitTest( - promql.LazyLoaderOpts{ + promqltest.LazyLoaderOpts{ EnableAtModifier: true, EnableNegativeOffset: true, }, @@ -396,9 +398,9 @@ func main() { os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable))) case tsdbDumpCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) + os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpSandboxDirRoot, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) case tsdbDumpOpenMetricsCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) + os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) // TODO(aSquare14): Work on adding support for custom block size. case openMetricsImportCmd.FullCommand(): os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration)) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 7306a3e64..78500fe93 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -25,6 +25,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -410,7 +411,7 @@ func TestExitCodes(t *testing.T) { } { t.Run(c.file, func(t *testing.T) { for _, lintFatal := range []bool{true, false} { - t.Run(fmt.Sprintf("%t", lintFatal), func(t *testing.T) { + t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) { args := []string{"-test.main", "check", "config", "testdata/" + c.file} if lintFatal { args = append(args, "--lint-fatal") diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 2ad969438..2ed7244b1 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -338,7 +338,7 @@ func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) { } func listBlocks(path string, humanReadable bool) error { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return err } @@ -393,7 +393,7 @@ func getFormatedBytes(bytes int64, humanReadable bool) string { } func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return nil, nil, err } @@ -708,8 +708,8 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. type SeriesSetFormatter func(series storage.SeriesSet) error -func dumpSamples(ctx context.Context, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { - db, err := tsdb.OpenDBReadOnly(path, nil) +func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { + db, err := tsdb.OpenDBReadOnly(dbDir, sandboxDirRoot, nil) if err != nil { return err } @@ -856,9 +856,9 @@ func displayHistogram(dataType string, datas []int, total int) { } avg := sum / len(datas) fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1]) - maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end))) - maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step))) - maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount))) + maxLeftLen := strconv.Itoa(len(strconv.Itoa(end))) + maxRightLen := strconv.Itoa(len(strconv.Itoa(end + step))) + maxCountLen := strconv.Itoa(len(strconv.Itoa(maxCount))) for bucket, count := range buckets { percentage := 100.0 * count / total fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage)) diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 36a65d73e..75089b168 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -26,7 +26,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/tsdb" ) @@ -64,6 +64,7 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin err := dumpSamples( context.Background(), path, + t.TempDir(), mint, maxt, match, @@ -88,7 +89,7 @@ func normalizeNewLine(b []byte) []byte { } func TestTSDBDump(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{foo="bar", baz="abc"} 1 2 3 4 5 heavy_metric{foo="bar"} 5 4 3 2 1 @@ -158,7 +159,7 @@ func TestTSDBDump(t *testing.T) { } func TestTSDBDumpOpenMetrics(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m my_counter{foo="bar", baz="abc"} 1 2 3 4 5 my_gauge{bar="foo", abc="baz"} 9 8 0 4 7 diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 6d6683a93..5451c5296 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -36,13 +36,14 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" ) // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { failed := false var run *regexp.Regexp @@ -69,7 +70,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, diffFla return successExitCode } -func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error { fmt.Println("Unit Testing: ", filename) b, err := os.ReadFile(filename) @@ -175,9 +176,9 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promql.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { // Setup testing suite. - suite, err := promql.NewLazyLoader(tg.seriesLoadingString(), queryOpts) + suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { return []error{err} } @@ -413,7 +414,7 @@ Outer: gotSamples = append(gotSamples, parsedSample{ Labels: s.Metric.Copy(), Value: s.F, - Histogram: promql.HistogramTestExpression(s.H), + Histogram: promqltest.HistogramTestExpression(s.H), }) } @@ -443,7 +444,7 @@ Outer: expSamples = append(expSamples, parsedSample{ Labels: lb, Value: s.Value, - Histogram: promql.HistogramTestExpression(hist), + Histogram: promqltest.HistogramTestExpression(hist), }) } @@ -572,7 +573,7 @@ func (la labelsAndAnnotations) String() string { } s := "[\n0:" + indentLines("\n"+la[0].String(), " ") for i, l := range la[1:] { - s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ") + s += ",\n" + strconv.Itoa(i+1) + ":" + indentLines("\n"+l.String(), " ") } s += "\n]" diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 971ddb40c..2dbd5a4e5 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" ) func TestRulesUnitTest(t *testing.T) { @@ -28,7 +28,7 @@ func TestRulesUnitTest(t *testing.T) { tests := []struct { name string args args - queryOpts promql.LazyLoaderOpts + queryOpts promqltest.LazyLoaderOpts want int }{ { @@ -92,7 +92,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/at-modifier-test.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableAtModifier: true, }, want: 0, @@ -109,7 +109,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/negative-offset-test.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableNegativeOffset: true, }, want: 0, @@ -119,7 +119,7 @@ func TestRulesUnitTest(t *testing.T) { args: args{ files: []string{"./testdata/no-test-group-interval.yml"}, }, - queryOpts: promql.LazyLoaderOpts{ + queryOpts: promqltest.LazyLoaderOpts{ EnableNegativeOffset: true, }, want: 0, @@ -142,7 +142,7 @@ func TestRulesUnitTestRun(t *testing.T) { tests := []struct { name string args args - queryOpts promql.LazyLoaderOpts + queryOpts promqltest.LazyLoaderOpts want int }{ { diff --git a/config/config.go b/config/config.go index 1cfd58864..463dbc357 100644 --- a/config/config.go +++ b/config/config.go @@ -145,6 +145,7 @@ var ( ScrapeInterval: model.Duration(1 * time.Minute), ScrapeTimeout: model.Duration(10 * time.Second), EvaluationInterval: model.Duration(1 * time.Minute), + RuleQueryOffset: model.Duration(0 * time.Minute), // When native histogram feature flag is enabled, ScrapeProtocols default // changes to DefaultNativeHistogramScrapeProtocols. ScrapeProtocols: DefaultScrapeProtocols, @@ -397,6 +398,8 @@ type GlobalConfig struct { ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` // How frequently to evaluate rules by default. EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` + // Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + RuleQueryOffset model.Duration `yaml:"rule_query_offset"` // File to which PromQL queries are logged. QueryLogFile string `yaml:"query_log_file,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. @@ -556,6 +559,7 @@ func (c *GlobalConfig) isZero() bool { c.ScrapeInterval == 0 && c.ScrapeTimeout == 0 && c.EvaluationInterval == 0 && + c.RuleQueryOffset == 0 && c.QueryLogFile == "" && c.ScrapeProtocols == nil } diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index aa79fd9c6..a6a0a8257 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -279,7 +280,7 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error if inst.PrivateDnsName != nil { labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName) } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.Platform != nil { diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 86b138be5..0ad7f2d54 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -229,7 +230,7 @@ func (d *LightsailDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, lightsailLabelRegion: model.LabelValue(d.cfg.Region), } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.PublicIpAddress != nil { diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 7c2ece2c7..70d95b9f3 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -20,6 +20,7 @@ import ( "math/rand" "net" "net/http" + "strconv" "strings" "sync" "time" @@ -492,7 +493,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil { labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) - address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) + address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(address) return labels, nil } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index 40eed7697..bdc1fc8dc 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -539,9 +539,9 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr // since the service may be registered remotely through a different node. var addr string if serviceNode.Service.Address != "" { - addr = net.JoinHostPort(serviceNode.Service.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Service.Address, strconv.Itoa(serviceNode.Service.Port)) } else { - addr = net.JoinHostPort(serviceNode.Node.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Node.Address, strconv.Itoa(serviceNode.Service.Port)) } labels := model.LabelSet{ diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index 18380b729..ecee60cb1 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -177,7 +177,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { } labels := model.LabelSet{ - doLabelID: model.LabelValue(fmt.Sprintf("%d", droplet.ID)), + doLabelID: model.LabelValue(strconv.Itoa(droplet.ID)), doLabelName: model.LabelValue(droplet.Name), doLabelImage: model.LabelValue(droplet.Image.Slug), doLabelImageName: model.LabelValue(droplet.Image.Name), diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index cf56a2ad0..314c3d38c 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "sync" "time" @@ -200,7 +201,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ tg := &targetgroup.Group{} hostPort := func(a string, p int) model.LabelValue { - return model.LabelValue(net.JoinHostPort(a, fmt.Sprintf("%d", p))) + return model.LabelValue(net.JoinHostPort(a, strconv.Itoa(p))) } for _, record := range response.Answer { @@ -209,7 +210,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ switch addr := record.(type) { case *dns.SRV: dnsSrvRecordTarget = model.LabelValue(addr.Target) - dnsSrvRecordPort = model.LabelValue(fmt.Sprintf("%d", addr.Port)) + dnsSrvRecordPort = model.LabelValue(strconv.Itoa(int(addr.Port))) // Remove the final dot from rooted DNS names to make them look more usual. addr.Target = strings.TrimRight(addr.Target, ".") diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index 6d0599dfa..df56f94c5 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,7 +15,6 @@ package hetzner import ( "context" - "fmt" "net" "net/http" "strconv" @@ -92,7 +91,7 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er for i, server := range servers { labels := model.LabelSet{ hetznerLabelRole: model.LabelValue(HetznerRoleHcloud), - hetznerLabelServerID: model.LabelValue(fmt.Sprintf("%d", server.ID)), + hetznerLabelServerID: model.LabelValue(strconv.FormatInt(server.ID, 10)), hetznerLabelServerName: model.LabelValue(server.Name), hetznerLabelDatacenter: model.LabelValue(server.Datacenter.Name), hetznerLabelPublicIPv4: model.LabelValue(server.PublicNet.IPv4.IP.String()), @@ -102,10 +101,10 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er hetznerLabelHcloudDatacenterLocation: model.LabelValue(server.Datacenter.Location.Name), hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Datacenter.Location.NetworkZone), hetznerLabelHcloudType: model.LabelValue(server.ServerType.Name), - hetznerLabelHcloudCPUCores: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Cores)), + hetznerLabelHcloudCPUCores: model.LabelValue(strconv.Itoa(server.ServerType.Cores)), hetznerLabelHcloudCPUType: model.LabelValue(server.ServerType.CPUType), - hetznerLabelHcloudMemoryGB: model.LabelValue(fmt.Sprintf("%d", int(server.ServerType.Memory))), - hetznerLabelHcloudDiskGB: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Disk)), + hetznerLabelHcloudMemoryGB: model.LabelValue(strconv.Itoa(int(server.ServerType.Memory))), + hetznerLabelHcloudDiskGB: model.LabelValue(strconv.Itoa(server.ServerType.Disk)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.PublicNet.IPv4.IP.String(), strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index b862c33f5..516470b05 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -112,7 +112,7 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) hetznerLabelPublicIPv4: model.LabelValue(server.Server.ServerIP), hetznerLabelServerStatus: model.LabelValue(server.Server.Status), hetznerLabelRobotProduct: model.LabelValue(server.Server.Product), - hetznerLabelRobotCancelled: model.LabelValue(fmt.Sprintf("%t", server.Server.Canceled)), + hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.Server.ServerIP, strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go index 1ed699645..a455a8e34 100644 --- a/discovery/legacymanager/manager_test.go +++ b/discovery/legacymanager/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) discovery.StaticConfig { var cfg discovery.StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index e2210fe2c..634a6b1d4 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -334,7 +334,7 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro } labels := model.LabelSet{ - linodeLabelID: model.LabelValue(fmt.Sprintf("%d", instance.ID)), + linodeLabelID: model.LabelValue(strconv.Itoa(instance.ID)), linodeLabelName: model.LabelValue(instance.Label), linodeLabelImage: model.LabelValue(instance.Image), linodeLabelPrivateIPv4: model.LabelValue(privateIPv4), @@ -347,13 +347,13 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro linodeLabelType: model.LabelValue(instance.Type), linodeLabelStatus: model.LabelValue(instance.Status), linodeLabelGroup: model.LabelValue(instance.Group), - linodeLabelGPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)), + linodeLabelGPUs: model.LabelValue(strconv.Itoa(instance.Specs.GPUs)), linodeLabelHypervisor: model.LabelValue(instance.Hypervisor), linodeLabelBackups: model.LabelValue(backupsStatus), - linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)), - linodeLabelSpecsMemoryBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Memory)<<20)), - linodeLabelSpecsVCPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.VCPUs)), - linodeLabelSpecsTransferBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Transfer)<<20)), + linodeLabelSpecsDiskBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Disk)<<20, 10)), + linodeLabelSpecsMemoryBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Memory)<<20, 10)), + linodeLabelSpecsVCPUs: model.LabelValue(strconv.Itoa(instance.Specs.VCPUs)), + linodeLabelSpecsTransferBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Transfer)<<20, 10)), } addr := net.JoinHostPort(publicIPv4, strconv.FormatUint(uint64(d.port), 10)) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 656d7c3c6..be07edbdb 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) StaticConfig { var cfg StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 3e9e15967..38b47accf 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -505,7 +505,7 @@ func targetEndpoint(task *task, port uint32, containerNet bool) string { host = task.Host } - return net.JoinHostPort(host, fmt.Sprintf("%d", port)) + return net.JoinHostPort(host, strconv.Itoa(int(port))) } // Get a list of ports and a list of labels from a PortMapping. diff --git a/discovery/metrics.go b/discovery/metrics.go index e738331a1..356be1ddc 100644 --- a/discovery/metrics.go +++ b/discovery/metrics.go @@ -19,16 +19,6 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - clientGoRequestMetrics = &clientGoRequestMetricAdapter{} - clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} -) - -func init() { - clientGoRequestMetrics.RegisterWithK8sGoClient() - clientGoWorkloadMetrics.RegisterWithK8sGoClient() -} - // Metrics to be used with a discovery manager. type Metrics struct { FailedConfigs prometheus.Gauge diff --git a/discovery/metrics_k8s_client.go b/discovery/metrics_k8s_client.go index f16245684..c13ce5331 100644 --- a/discovery/metrics_k8s_client.go +++ b/discovery/metrics_k8s_client.go @@ -35,6 +35,11 @@ const ( workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue" ) +var ( + clientGoRequestMetrics = &clientGoRequestMetricAdapter{} + clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{} +) + var ( // Metrics for client-go's HTTP requests. clientGoRequestResultMetricVec = prometheus.NewCounterVec( @@ -135,6 +140,9 @@ func clientGoMetrics() []prometheus.Collector { } func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error { + clientGoRequestMetrics.RegisterWithK8sGoClient() + clientGoWorkloadMetrics.RegisterWithK8sGoClient() + for _, collector := range clientGoMetrics() { err := registerer.Register(collector) if err != nil { diff --git a/discovery/moby/network.go b/discovery/moby/network.go index 0e0d0041d..794d2e607 100644 --- a/discovery/moby/network.go +++ b/discovery/moby/network.go @@ -15,7 +15,7 @@ package moby import ( "context" - "fmt" + "strconv" "github.com/docker/docker/api/types" "github.com/docker/docker/client" @@ -44,8 +44,8 @@ func getNetworksLabels(ctx context.Context, client *client.Client, labelPrefix s labelPrefix + labelNetworkID: network.ID, labelPrefix + labelNetworkName: network.Name, labelPrefix + labelNetworkScope: network.Scope, - labelPrefix + labelNetworkInternal: fmt.Sprintf("%t", network.Internal), - labelPrefix + labelNetworkIngress: fmt.Sprintf("%t", network.Ingress), + labelPrefix + labelNetworkInternal: strconv.FormatBool(network.Internal), + labelPrefix + labelNetworkIngress: strconv.FormatBool(network.Ingress), } for k, v := range network.Labels { ln := strutil.SanitizeLabelName(k) diff --git a/discovery/moby/nodes.go b/discovery/moby/nodes.go index a7c5551c0..b5be844ed 100644 --- a/discovery/moby/nodes.go +++ b/discovery/moby/nodes.go @@ -66,7 +66,7 @@ func (d *Discovery) refreshNodes(ctx context.Context) ([]*targetgroup.Group, err swarmLabelNodeAddress: model.LabelValue(n.Status.Addr), } if n.ManagerStatus != nil { - labels[swarmLabelNodeManagerLeader] = model.LabelValue(fmt.Sprintf("%t", n.ManagerStatus.Leader)) + labels[swarmLabelNodeManagerLeader] = model.LabelValue(strconv.FormatBool(n.ManagerStatus.Leader)) labels[swarmLabelNodeManagerReachability] = model.LabelValue(n.ManagerStatus.Reachability) labels[swarmLabelNodeManagerAddr] = model.LabelValue(n.ManagerStatus.Addr) } diff --git a/discovery/moby/services.go b/discovery/moby/services.go index 1d472b5c0..c61b49925 100644 --- a/discovery/moby/services.go +++ b/discovery/moby/services.go @@ -116,7 +116,7 @@ func (d *Discovery) refreshServices(ctx context.Context) ([]*targetgroup.Group, labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/moby/tasks.go b/discovery/moby/tasks.go index 2505a7b07..38b9d33de 100644 --- a/discovery/moby/tasks.go +++ b/discovery/moby/tasks.go @@ -150,7 +150,7 @@ func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, err labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 16964cfb6..8964da929 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/gophercloud/gophercloud" @@ -72,7 +73,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + h.region), + Source: "OS_" + h.region, } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-hypervisors-details @@ -84,7 +85,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } for _, hypervisor := range hypervisorList { labels := model.LabelSet{} - addr := net.JoinHostPort(hypervisor.HostIP, fmt.Sprintf("%d", h.port)) + addr := net.JoinHostPort(hypervisor.HostIP, strconv.Itoa(h.port)) labels[model.AddressLabel] = model.LabelValue(addr) labels[openstackLabelHypervisorID] = model.LabelValue(hypervisor.ID) labels[openstackLabelHypervisorHostName] = model.LabelValue(hypervisor.HypervisorHostname) diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 9b28c1d6e..750d414a2 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -120,7 +121,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } pager := servers.List(client, opts) tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + i.region), + Source: "OS_" + i.region, } err = pager.EachPage(func(page pagination.Page) (bool, error) { if ctx.Err() != nil { @@ -194,7 +195,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if val, ok := floatingIPList[floatingIPKey{id: s.ID, fixed: addr}]; ok { lbls[openstackLabelPublicIP] = model.LabelValue(val) } - addr = net.JoinHostPort(addr, fmt.Sprintf("%d", i.port)) + addr = net.JoinHostPort(addr, strconv.Itoa(i.port)) lbls[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, lbls) diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index bb5dadcd7..a70857a08 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -144,12 +144,12 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou model.InstanceLabel: model.LabelValue(server.Name), dedicatedServerLabelPrefix + "state": model.LabelValue(server.State), dedicatedServerLabelPrefix + "commercial_range": model.LabelValue(server.CommercialRange), - dedicatedServerLabelPrefix + "link_speed": model.LabelValue(fmt.Sprintf("%d", server.LinkSpeed)), + dedicatedServerLabelPrefix + "link_speed": model.LabelValue(strconv.Itoa(server.LinkSpeed)), dedicatedServerLabelPrefix + "rack": model.LabelValue(server.Rack), dedicatedServerLabelPrefix + "no_intervention": model.LabelValue(strconv.FormatBool(server.NoIntervention)), dedicatedServerLabelPrefix + "os": model.LabelValue(server.Os), dedicatedServerLabelPrefix + "support_level": model.LabelValue(server.SupportLevel), - dedicatedServerLabelPrefix + "server_id": model.LabelValue(fmt.Sprintf("%d", server.ServerID)), + dedicatedServerLabelPrefix + "server_id": model.LabelValue(strconv.FormatInt(server.ServerID, 10)), dedicatedServerLabelPrefix + "reverse": model.LabelValue(server.Reverse), dedicatedServerLabelPrefix + "datacenter": model.LabelValue(server.Datacenter), dedicatedServerLabelPrefix + "name": model.LabelValue(server.Name), diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index e2d1dee36..58ceeabd8 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -19,6 +19,7 @@ import ( "net/netip" "net/url" "path" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -161,21 +162,21 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { model.InstanceLabel: model.LabelValue(server.Name), vpsLabelPrefix + "offer": model.LabelValue(server.Model.Offer), vpsLabelPrefix + "datacenter": model.LabelValue(fmt.Sprintf("%+v", server.Model.Datacenter)), - vpsLabelPrefix + "model_vcore": model.LabelValue(fmt.Sprintf("%d", server.Model.Vcore)), - vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(fmt.Sprintf("%d", server.Model.MaximumAdditionalIP)), + vpsLabelPrefix + "model_vcore": model.LabelValue(strconv.Itoa(server.Model.Vcore)), + vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(strconv.Itoa(server.Model.MaximumAdditionalIP)), vpsLabelPrefix + "version": model.LabelValue(server.Model.Version), vpsLabelPrefix + "model_name": model.LabelValue(server.Model.Name), - vpsLabelPrefix + "disk": model.LabelValue(fmt.Sprintf("%d", server.Model.Disk)), - vpsLabelPrefix + "memory": model.LabelValue(fmt.Sprintf("%d", server.Model.Memory)), + vpsLabelPrefix + "disk": model.LabelValue(strconv.Itoa(server.Model.Disk)), + vpsLabelPrefix + "memory": model.LabelValue(strconv.Itoa(server.Model.Memory)), vpsLabelPrefix + "zone": model.LabelValue(server.Zone), vpsLabelPrefix + "display_name": model.LabelValue(server.DisplayName), vpsLabelPrefix + "cluster": model.LabelValue(server.Cluster), vpsLabelPrefix + "state": model.LabelValue(server.State), vpsLabelPrefix + "name": model.LabelValue(server.Name), vpsLabelPrefix + "netboot_mode": model.LabelValue(server.NetbootMode), - vpsLabelPrefix + "memory_limit": model.LabelValue(fmt.Sprintf("%d", server.MemoryLimit)), + vpsLabelPrefix + "memory_limit": model.LabelValue(strconv.Itoa(server.MemoryLimit)), vpsLabelPrefix + "offer_type": model.LabelValue(server.OfferType), - vpsLabelPrefix + "vcore": model.LabelValue(fmt.Sprintf("%d", server.Vcore)), + vpsLabelPrefix + "vcore": model.LabelValue(strconv.Itoa(server.Vcore)), vpsLabelPrefix + "ipv4": model.LabelValue(ipv4), vpsLabelPrefix + "ipv6": model.LabelValue(ipv6), } diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8c9ccde0a..8f89acbf9 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -237,7 +237,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { pdbLabelResource: model.LabelValue(resource.Resource), pdbLabelType: model.LabelValue(resource.Type), pdbLabelTitle: model.LabelValue(resource.Title), - pdbLabelExported: model.LabelValue(fmt.Sprintf("%t", resource.Exported)), + pdbLabelExported: model.LabelValue(strconv.FormatBool(resource.Exported)), pdbLabelFile: model.LabelValue(resource.File), pdbLabelEnvironment: model.LabelValue(resource.Environment), } diff --git a/discovery/scaleway/instance.go b/discovery/scaleway/instance.go index 9dd786c80..6540f06dc 100644 --- a/discovery/scaleway/instance.go +++ b/discovery/scaleway/instance.go @@ -174,20 +174,25 @@ func (d *instanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, labels[instanceTagsLabel] = model.LabelValue(tags) } + addr := "" if server.IPv6 != nil { labels[instancePublicIPv6Label] = model.LabelValue(server.IPv6.Address.String()) + addr = server.IPv6.Address.String() } if server.PublicIP != nil { labels[instancePublicIPv4Label] = model.LabelValue(server.PublicIP.Address.String()) + addr = server.PublicIP.Address.String() } if server.PrivateIP != nil { labels[instancePrivateIPv4Label] = model.LabelValue(*server.PrivateIP) + addr = *server.PrivateIP + } - addr := net.JoinHostPort(*server.PrivateIP, strconv.FormatUint(uint64(d.port), 10)) + if addr != "" { + addr := net.JoinHostPort(addr, strconv.FormatUint(uint64(d.port), 10)) labels[model.AddressLabel] = model.LabelValue(addr) - targets = append(targets, labels) } } diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go index d2449d00c..ae70a9ed2 100644 --- a/discovery/scaleway/instance_test.go +++ b/discovery/scaleway/instance_test.go @@ -60,7 +60,7 @@ api_url: %s tg := tgs[0] require.NotNil(t, tg) require.NotNil(t, tg.Targets) - require.Len(t, tg.Targets, 2) + require.Len(t, tg.Targets, 3) for i, lbls := range []model.LabelSet{ { @@ -110,6 +110,28 @@ api_url: %s "__meta_scaleway_instance_type": "DEV1-S", "__meta_scaleway_instance_zone": "fr-par-1", }, + { + "__address__": "51.158.183.115:80", + "__meta_scaleway_instance_boot_type": "local", + "__meta_scaleway_instance_hostname": "routed-dualstack", + "__meta_scaleway_instance_id": "4904366a-7e26-4b65-b97b-6392c761247a", + "__meta_scaleway_instance_image_arch": "x86_64", + "__meta_scaleway_instance_image_id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "__meta_scaleway_instance_image_name": "Ubuntu 22.04 Jammy Jellyfish", + "__meta_scaleway_instance_location_cluster_id": "19", + "__meta_scaleway_instance_location_hypervisor_id": "1201", + "__meta_scaleway_instance_location_node_id": "24", + "__meta_scaleway_instance_name": "routed-dualstack", + "__meta_scaleway_instance_organization_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_project_id": "20b3d507-96ac-454c-a795-bc731b46b12f", + "__meta_scaleway_instance_public_ipv4": "51.158.183.115", + "__meta_scaleway_instance_region": "nl-ams", + "__meta_scaleway_instance_security_group_id": "984414da-9fc2-49c0-a925-fed6266fe092", + "__meta_scaleway_instance_security_group_name": "Default security group", + "__meta_scaleway_instance_status": "running", + "__meta_scaleway_instance_type": "DEV1-S", + "__meta_scaleway_instance_zone": "nl-ams-1", + }, } { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { require.Equal(t, lbls, tg.Targets[i]) diff --git a/discovery/scaleway/testdata/instance.json b/discovery/scaleway/testdata/instance.json index f8d35b215..b433f7598 100644 --- a/discovery/scaleway/testdata/instance.json +++ b/discovery/scaleway/testdata/instance.json @@ -216,6 +216,146 @@ "placement_group": null, "private_nics": [], "zone": "fr-par-1" + }, + { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack", + "arch": "x86_64", + "commercial_type": "DEV1-S", + "boot_type": "local", + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "hostname": "routed-dualstack", + "image": { + "id": "3e0a5b84-1d69-4993-8fa4-0d7df52d5160", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "organization": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "project": "51b656e3-4865-41e8-adbc-0c45bdd780db", + "root_volume": { + "id": "13d945b9-5e78-4f9d-8ac4-c4bc2fa7c31a", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "unified", + "size": 10000000000 + }, + "extra_volumes": {}, + "public": true, + "arch": "x86_64", + "creation_date": "2024-02-22T15:52:56.037007+00:00", + "modification_date": "2024-02-22T15:52:56.037007+00:00", + "default_bootscript": null, + "from_server": null, + "state": "available", + "tags": [], + "zone": "nl-ams-1" + }, + "volumes": { + "0": { + "boot": false, + "id": "fe85c817-e67e-4e24-8f13-bde3e9f42620", + "name": "Ubuntu 22.04 Jammy Jellyfish", + "volume_type": "l_ssd", + "export_uri": null, + "organization": "20b3d507-96ac-454c-a795-bc731b46b12f", + "project": "20b3d507-96ac-454c-a795-bc731b46b12f", + "server": { + "id": "4904366a-7e26-4b65-b97b-6392c761247a", + "name": "routed-dualstack" + }, + "size": 20000000000, + "state": "available", + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:50:14.019739+00:00", + "tags": [], + "zone": "nl-ams-1" + } + }, + "tags": [], + "state": "running", + "protected": false, + "state_detail": "booted", + "public_ip": { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + "public_ips": [ + { + "id": "53f8f861-7a11-4b16-a4bc-fb8f4b4a11d0", + "address": "51.158.183.115", + "dynamic": false, + "gateway": "62.210.0.1", + "netmask": "32", + "family": "inet", + "provisioning_mode": "dhcp", + "tags": [], + "state": "attached", + "ipam_id": "ec3499ff-a664-49b7-818a-9fe4b95aee5e" + }, + { + "id": "f52a8c81-0875-4aee-b96e-eccfc6bec367", + "address": "2001:bc8:1640:1568:dc00:ff:fe21:91b", + "dynamic": false, + "gateway": "fe80::dc00:ff:fe21:91c", + "netmask": "64", + "family": "inet6", + "provisioning_mode": "slaac", + "tags": [], + "state": "attached", + "ipam_id": "40d1e6ea-e932-42f9-8acb-55398bec7ad6" + } + ], + "mac_address": "de:00:00:21:09:1b", + "routed_ip_enabled": true, + "ipv6": null, + "extra_networks": [], + "dynamic_ip_required": false, + "enable_ipv6": false, + "private_ip": null, + "creation_date": "2024-04-19T14:50:14.019739+00:00", + "modification_date": "2024-04-19T14:52:21.181670+00:00", + "bootscript": { + "id": "5a520dda-96d6-4ed2-acd1-1d526b6859fe", + "public": true, + "title": "x86_64 mainline 4.4.182 rev1", + "architecture": "x86_64", + "organization": "11111111-1111-4111-8111-111111111111", + "project": "11111111-1111-4111-8111-111111111111", + "kernel": "http://10.196.2.9/kernel/x86_64-mainline-lts-4.4-4.4.182-rev1/vmlinuz-4.4.182", + "dtb": "", + "initrd": "http://10.196.2.9/initrd/initrd-Linux-x86_64-v3.14.6.gz", + "bootcmdargs": "LINUX_COMMON scaleway boot=local nbd.max_part=16", + "default": true, + "zone": "nl-ams-1" + }, + "security_group": { + "id": "984414da-9fc2-49c0-a925-fed6266fe092", + "name": "Default security group" + }, + "location": { + "zone_id": "ams1", + "platform_id": "23", + "cluster_id": "19", + "hypervisor_id": "1201", + "node_id": "24" + }, + "maintenances": [], + "allowed_actions": [ + "poweroff", + "terminate", + "reboot", + "stop_in_place", + "backup" + ], + "placement_group": null, + "private_nics": [], + "zone": "nl-ams-1" } ] -} +} \ No newline at end of file diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index e885ef2e8..c8af2f158 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -20,6 +20,7 @@ import ( "net/http" "net/url" "path" + "strconv" "strings" "time" @@ -269,7 +270,7 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(fmt.Sprintf("%d", endpoint.SystemID)), + uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 303c7ca6d..92904dd71 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -280,17 +280,17 @@ func parseServersetMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[serversetPathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.ServiceEndpoint.Host, fmt.Sprintf("%d", member.ServiceEndpoint.Port))) + net.JoinHostPort(member.ServiceEndpoint.Host, strconv.Itoa(member.ServiceEndpoint.Port))) labels[serversetEndpointLabelPrefix+"_host"] = model.LabelValue(member.ServiceEndpoint.Host) - labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.ServiceEndpoint.Port)) + labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.ServiceEndpoint.Port)) for name, endpoint := range member.AdditionalEndpoints { cleanName := model.LabelName(strutil.SanitizeLabelName(name)) labels[serversetEndpointLabelPrefix+"_host_"+cleanName] = model.LabelValue( endpoint.Host) labels[serversetEndpointLabelPrefix+"_port_"+cleanName] = model.LabelValue( - fmt.Sprintf("%d", endpoint.Port)) + strconv.Itoa(endpoint.Port)) } labels[serversetStatusLabel] = model.LabelValue(member.Status) @@ -321,10 +321,10 @@ func parseNerveMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[nervePathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.Host, fmt.Sprintf("%d", member.Port))) + net.JoinHostPort(member.Host, strconv.Itoa(member.Port))) labels[nerveEndpointLabelPrefix+"_host"] = model.LabelValue(member.Host) - labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.Port)) + labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.Port)) labels[nerveEndpointLabelPrefix+"_name"] = model.LabelValue(member.Name) return labels, nil diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 93eaf251d..aa9bf3bfb 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -48,7 +48,7 @@ The Prometheus monitoring server | --rules.alert.for-outage-tolerance | Max time to tolerate prometheus outage for restoring "for" state of alert. Use with server mode only. | `1h` | | --rules.alert.for-grace-period | Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. Use with server mode only. | `10m` | | --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` | -| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. Use with server mode only. | `4` | +| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` | | --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` | | --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` | | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 3eceed48f..443cd3f0c 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -566,6 +566,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | @@ -584,7 +585,7 @@ Dump samples from a TSDB. ##### `promtool tsdb dump-openmetrics` -[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped. +[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics. @@ -592,6 +593,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 2f2e07a0c..a8fc9c626 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -70,6 +70,10 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] + + # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). @@ -1349,7 +1353,7 @@ interface. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_openstack_address_pool`: the pool of the private IP. -* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance. +* `__meta_openstack_instance_flavor`: the flavor ID of the OpenStack instance. * `__meta_openstack_instance_id`: the OpenStack instance ID. * `__meta_openstack_instance_image`: the ID of the image the OpenStack instance is using. * `__meta_openstack_instance_name`: the OpenStack instance name. @@ -1357,7 +1361,7 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_openstack_private_ip`: the private IP of the OpenStack instance. * `__meta_openstack_project_id`: the project (tenant) owning this instance. * `__meta_openstack_public_ip`: the public IP of the OpenStack instance. -* `__meta_openstack_tag_`: each tag value of the instance. +* `__meta_openstack_tag_`: each metadata item of the instance, with any unsupported characters converted to an underscore. * `__meta_openstack_user_id`: the user account owning the tenant. See below for the configuration options for OpenStack discovery: @@ -1467,6 +1471,7 @@ For OVHcloud's [public cloud instances](https://www.ovhcloud.com/en/public-cloud * `__meta_ovhcloud_dedicated_server_ipv6`: the IPv6 of the server * `__meta_ovhcloud_dedicated_server_link_speed`: the link speed of the server * `__meta_ovhcloud_dedicated_server_name`: the name of the server +* `__meta_ovhcloud_dedicated_server_no_intervention`: whether datacenter intervention is disabled for the server * `__meta_ovhcloud_dedicated_server_os`: the operating system of the server * `__meta_ovhcloud_dedicated_server_rack`: the rack of the server * `__meta_ovhcloud_dedicated_server_reverse`: the reverse DNS name of the server @@ -2952,9 +2957,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_scaleway_instance_type`: commercial type of the server * `__meta_scaleway_instance_zone`: the zone of the server (ex: `fr-par-1`, complete list [here](https://developers.scaleway.com/en/products/instance/api/#introduction)) -This role uses the private IPv4 address by default. This can be +This role uses the first address it finds in the following order: private IPv4, public IPv4, public IPv6. This can be changed with relabeling, as demonstrated in [the Prometheus scaleway-sd configuration file](/documentation/examples/prometheus-scaleway.yml). +Should an instance have no address before relabeling, it will not be added to the target list and you will not be able to relabel it. #### Baremetal role @@ -3672,7 +3678,8 @@ queue_config: [ min_shards: | default = 1 ] # Maximum number of samples per send. [ max_samples_per_send: | default = 2000] - # Maximum time a sample will wait in buffer. + # Maximum time a sample will wait for a send. The sample might wait less + # if the buffer is full. Further time might pass due to potential retries. [ batch_send_deadline: | default = 5s ] # Initial retry delay. Gets doubled for every retry. [ min_backoff: | default = 30ms ] diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 48ab951f9..9aa226bbc 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -86,6 +86,9 @@ name: # rule can produce. 0 is no limit. [ limit: | default = 0 ] +# Offset the rule evaluation timestamp of this particular group by the specified duration into the past. +[ query_offset: | default = global.rule_query_offset ] + rules: [ - ... ] ``` @@ -148,6 +151,9 @@ the rule, active, pending, or inactive, are cleared as well. The event will be recorded as an error in the evaluation, and as such no stale markers are written. +# Rule query offset +This is useful to ensure the underlying metrics have been received and stored in Prometheus. Metric availability delays are more likely to occur when Prometheus is running as a remote write target due to the nature of distributed systems, but can also occur when there's anomalies with scraping and/or short evaluation intervals. + # Failed rule evaluations due to slow evaluation If a rule group hasn't finished evaluating before its next evaluation is supposed to start (as defined by the `evaluation_interval`), the next evaluation will be skipped. Subsequent evaluations of the rule group will continue to be skipped until the initial evaluation either completes or times out. When this happens, there will be a gap in the metric produced by the recording rule. The `rule_group_iterations_missed_total` metric will be incremented for each missed iteration of the rule group. diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index e3dd13306..efbd08e98 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -5,63 +5,7 @@ sort_rank: 7 # Remote Read API -This is not currently considered part of the stable API and is subject to change -even between non-major version releases of Prometheus. - -## Format overview - -The API response format is JSON. Every successful API request returns a `2xx` -status code. - -Invalid requests that reach the API handlers return a JSON error object -and one of the following HTTP response codes: - -- `400 Bad Request` when parameters are missing or incorrect. -- `422 Unprocessable Entity` when an expression can't be executed - ([RFC4918](https://tools.ietf.org/html/rfc4918#page-78)). -- `503 Service Unavailable` when queries time out or abort. - -Other non-`2xx` codes may be returned for errors occurring before the API -endpoint is reached. - -An array of warnings may be returned if there are errors that do -not inhibit the request execution. All of the data that was successfully -collected will be returned in the data field. - -The JSON response envelope format is as follows: - -``` -{ - "status": "success" | "error", - "data": , - - // Only set if status is "error". The data field may still hold - // additional data. - "errorType": "", - "error": "", - - // Only if there were warnings while executing the request. - // There will still be data in the data field. - "warnings": [""] -} -``` - -Generic placeholders are defined as follows: - -* ``: Input timestamps may be provided either in -[RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format or as a Unix timestamp -in seconds, with optional decimal places for sub-second precision. Output -timestamps are always represented as Unix timestamps in seconds. -* ``: Prometheus [time series -selectors](basics.md#time-series-selectors) like `http_requests_total` or -`http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time_durations). -For example, `5m` refers to a duration of 5 minutes. -* ``: boolean values (strings `true` and `false`). - -Note: Names of query parameters that may be repeated end with `[]`. - -## Remote Read API +> This is not currently considered part of the stable API and is subject to change even between non-major version releases of Prometheus. This API provides data read functionality from Prometheus. This interface expects [snappy](https://github.com/google/snappy) compression. The API definition is located [here](https://github.com/prometheus/prometheus/blob/master/prompb/remote.proto). @@ -79,5 +23,3 @@ This returns a message that includes a list of raw samples. These streamed chunks utilize an XOR algorithm inspired by the [Gorilla](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) compression to encode the chunks. However, it provides resolution to the millisecond instead of to the second. - - diff --git a/docs/storage.md b/docs/storage.md index b4c5b6ada..b66f2062a 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -84,8 +84,10 @@ or 31 days, whichever is smaller. Prometheus has several flags that configure local storage. The most important are: - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -- `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. - Overrides `storage.tsdb.retention` if this flag is set to anything other than default. +- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is + set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` + nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. + Supported units: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only @@ -195,6 +197,9 @@ or time-series database to Prometheus. To do so, the user must first convert the source data into [OpenMetrics](https://openmetrics.io/) format, which is the input format for the backfilling as described below. +Note that native histograms and staleness markers are not supported by this +procedure, as they cannot be represented in the OpenMetrics format. + ### Usage Backfilling can be used via the Promtool command line. Promtool will write the blocks diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index bfbca7b70..8ccbafe6f 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -127,9 +127,9 @@ func (d *discovery) parseServiceNodes(resp *http.Response, name string) (*target // since the service may be registered remotely through a different node. var addr string if node.ServiceAddress != "" { - addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.ServiceAddress, strconv.Itoa(node.ServicePort)) } else { - addr = net.JoinHostPort(node.Address, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.Address, strconv.Itoa(node.ServicePort)) } target := model.LabelSet{model.AddressLabel: model.LabelValue(addr)} diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index dff988131..1ab2cec13 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -10,7 +10,7 @@ require ( github.com/influxdata/influxdb v1.11.5 github.com/prometheus/client_golang v1.19.0 github.com/prometheus/common v0.53.0 - github.com/prometheus/prometheus v0.51.1 + github.com/prometheus/prometheus v0.51.2 github.com/stretchr/testify v1.9.0 ) diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index b145f362f..9506ae638 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -279,8 +279,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/prometheus v0.51.1 h1:V2e7x2oiUC0Megp26+xjffxBf9EGkyP1iQuGd4VjUSU= -github.com/prometheus/prometheus v0.51.1/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= +github.com/prometheus/prometheus v0.51.2 h1:U0faf1nT4CB9DkBW87XLJCBi2s8nwWXdTbyzRUAkX0w= +github.com/prometheus/prometheus v0.51.2/go.mod h1:yv4MwOn3yHMQ6MZGHPg/U7Fcyqf+rxqiZfSur6myVtc= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/scaleway/scaleway-sdk-go v1.0.0-beta.25 h1:/8rfZAdFfafRXOgz+ZpMZZWZ5pYggCY9t7e/BvjaBHM= diff --git a/documentation/prometheus-mixin/config.libsonnet b/documentation/prometheus-mixin/config.libsonnet index ab9079a5e..70d46a221 100644 --- a/documentation/prometheus-mixin/config.libsonnet +++ b/documentation/prometheus-mixin/config.libsonnet @@ -44,5 +44,10 @@ // The default refresh time for all dashboards, default to 60s refresh: '60s', }, + + // Opt-out of multi-cluster dashboards by overriding this. + showMultiCluster: true, + // The cluster label to infer the cluster name from. + clusterLabel: 'cluster', }, } diff --git a/documentation/prometheus-mixin/dashboards.libsonnet b/documentation/prometheus-mixin/dashboards.libsonnet index efe53dbac..2bdd168cc 100644 --- a/documentation/prometheus-mixin/dashboards.libsonnet +++ b/documentation/prometheus-mixin/dashboards.libsonnet @@ -10,21 +10,32 @@ local template = grafana.template; { grafanaDashboards+:: { 'prometheus.json': - g.dashboard( + local showMultiCluster = $._config.showMultiCluster; + local dashboard = g.dashboard( '%(prefix)sOverview' % $._config.grafanaPrometheus - ) - .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'cluster') - .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') - .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + ); + local templatedDashboard = if showMultiCluster then + dashboard + .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, $._config.clusterLabel) + .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') + .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + else + dashboard + .addMultiTemplate('job', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'job') + .addMultiTemplate('instance', 'prometheus_build_info{job=~"$job"}', 'instance'); + templatedDashboard .addRow( g.row('Prometheus Stats') .addPanel( g.panel('Prometheus Stats') + - g.tablePanel([ + g.tablePanel(if showMultiCluster then [ 'count by (cluster, job, instance, version) (prometheus_build_info{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', 'max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', + ] else [ + 'count by (job, instance, version) (prometheus_build_info{job=~"$job", instance=~"$instance"})', + 'max by (job, instance) (time() - process_start_time_seconds{job=~"$job", instance=~"$instance"})', ], { - cluster: { alias: 'Cluster' }, + cluster: { alias: if showMultiCluster then 'Cluster' else '' }, job: { alias: 'Job' }, instance: { alias: 'Instance' }, version: { alias: 'Version' }, @@ -37,12 +48,18 @@ local template = grafana.template; g.row('Discovery') .addPanel( g.panel('Target Sync') + - g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3', '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}') + + g.queryPanel(if showMultiCluster then 'sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3' + else 'sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}' + else '{{scrape_job}}') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Targets') + - g.queryPanel('sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})', '{{cluster}}:{{job}}:{{instance}}') + + g.queryPanel(if showMultiCluster then 'sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})' + else 'sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}' + else 'Targets') + g.stack ) ) @@ -50,29 +67,47 @@ local template = grafana.template; g.row('Retrieval') .addPanel( g.panel('Average Scrape Interval Duration') + - g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{cluster}}:{{job}}:{{instance}} {{interval}} configured') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3' + else 'rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}} {{interval}} configured' + else '{{interval}} configured') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Scrape failures') + - g.queryPanel([ + g.queryPanel(if showMultiCluster then [ 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', - ], [ + ] else [ + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))', + ], if showMultiCluster then [ 'exceeded body size limit: {{cluster}} {{job}} {{instance}}', 'exceeded sample limit: {{cluster}} {{job}} {{instance}}', 'duplicate timestamp: {{cluster}} {{job}} {{instance}}', 'out of bounds: {{cluster}} {{job}} {{instance}}', 'out of order: {{cluster}} {{job}} {{instance}}', + ] else [ + 'exceeded body size limit: {{job}}', + 'exceeded sample limit: {{job}}', + 'duplicate timestamp: {{job}}', + 'out of bounds: {{job}}', + 'out of order: {{job}}', ]) + g.stack ) .addPanel( g.panel('Appended Samples') + - g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])' + else 'rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack ) ) @@ -80,12 +115,18 @@ local template = grafana.template; g.row('Storage') .addPanel( g.panel('Head Series') + - g.queryPanel('prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head series') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_series{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head series' + else '{{job}} {{instance}} head series') + g.stack ) .addPanel( g.panel('Head Chunks') + - g.queryPanel('prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head chunks') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_chunks{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head chunks' + else '{{job}} {{instance}} head chunks') + g.stack ) ) @@ -93,12 +134,18 @@ local template = grafana.template; g.row('Query') .addPanel( g.panel('Query Rate') + - g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])' + else 'rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack, ) .addPanel( g.panel('Stage Duration') + - g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3', '{{slice}}') + + g.queryPanel(if showMultiCluster then 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3' + else 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",job=~"$job",instance=~"$instance"}) * 1e3', + if showMultiCluster then '{{slice}}' + else '{{slice}}') + { yaxes: g.yaxes('ms') } + g.stack, ) diff --git a/go.mod b/go.mod index 165540e7f..7b94f792e 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.8 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.32.0 + github.com/linode/linodego v1.33.0 github.com/miekg/dns v1.1.59 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f @@ -60,7 +60,6 @@ require ( github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c github.com/stretchr/testify v1.9.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/featuregate v1.5.0 go.opentelemetry.io/collector/pdata v1.5.0 go.opentelemetry.io/collector/semconv v0.98.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0 @@ -80,10 +79,10 @@ require ( golang.org/x/sys v0.19.0 golang.org/x/time v0.5.0 golang.org/x/tools v0.20.0 - google.golang.org/api v0.174.0 + google.golang.org/api v0.177.0 google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.0 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.29.3 @@ -94,8 +93,8 @@ require ( ) require ( - cloud.google.com/go/auth v0.2.0 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.0 // indirect + cloud.google.com/go/auth v0.3.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect @@ -151,7 +150,6 @@ require ( github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-retryablehttp v0.7.4 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect - github.com/hashicorp/go-version v1.6.0 // indirect github.com/hashicorp/golang-lru v0.6.0 // indirect github.com/hashicorp/serf v0.10.1 // indirect github.com/imdario/mergo v0.3.16 // indirect @@ -191,7 +189,7 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/term v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gotest.tools/v3 v3.0.3 // indirect diff --git a/go.sum b/go.sum index 4b8602b05..d030a96c4 100644 --- a/go.sum +++ b/go.sum @@ -12,10 +12,10 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.2.0 h1:y6oTcpMSbOcXbwYgUUrvI+mrQ2xbrcdpPgtVbCGTLTk= -cloud.google.com/go/auth v0.2.0/go.mod h1:+yb+oy3/P0geX6DLKlqiGHARGR6EX2GRtYCzWOCQSbU= -cloud.google.com/go/auth/oauth2adapt v0.2.0 h1:FR8zevgQwu+8CqiOT5r6xCmJa3pJC/wdXEEPF1OkNhA= -cloud.google.com/go/auth/oauth2adapt v0.2.0/go.mod h1:AfqujpDAlTfLfeCIl/HJZZlIxD8+nJoZ5e0x1IxGq5k= +cloud.google.com/go/auth v0.3.0 h1:PRyzEpGfx/Z9e8+lHsbkoUVXD0gnu4MNmm7Gp8TQNIs= +cloud.google.com/go/auth v0.3.0/go.mod h1:lBv6NKTWp8E3LPzmO1TbiiRKc4drLOfHsgmlH9ogv5w= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -471,8 +471,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.32.0 h1:OmZzB3iON6uu84VtLFf64uKmAQqJJarvmsVguroioPI= -github.com/linode/linodego v1.32.0/go.mod h1:y8GDP9uLVH4jTB9qyrgw79qfKdYJmNCGUOJmfuiOcmI= +github.com/linode/linodego v1.33.0 h1:cX2FYry7r6CA1ujBMsdqiM4VhvIQtnWsOuVblzfBhCw= +github.com/linode/linodego v1.33.0/go.mod h1:dSJJgIwqZCF5wnpuC6w5cyIbRtcexAm7uVvuJopGB40= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= @@ -722,8 +722,6 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/featuregate v1.5.0 h1:uK8qnYQKz1TMkK+FDTFsywg/EybW/gbnOUaPNUkRznM= -go.opentelemetry.io/collector/featuregate v1.5.0/go.mod h1:w7nUODKxEi3FLf1HslCiE6YWtMtOOrMnSwsDam8Mg9w= go.opentelemetry.io/collector/pdata v1.5.0 h1:1fKTmUpr0xCOhP/B0VEvtz7bYPQ45luQ8XFyA07j8LE= go.opentelemetry.io/collector/pdata v1.5.0/go.mod h1:TYj8aKRWZyT/KuKQXKyqSEvK/GV+slFaDMEI+Ke64Yw= go.opentelemetry.io/collector/semconv v0.98.0 h1:zO4L4TmlxXoYu8UgPeYElGY19BW7wPjM+quL5CzoOoY= @@ -1045,8 +1043,8 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.174.0 h1:zB1BWl7ocxfTea2aQ9mgdzXjnfPySllpPOskdnO+q34= -google.golang.org/api v0.174.0/go.mod h1:aC7tB6j0HR1Nl0ni5ghpx6iLasmAX78Zkh/wgxAAjLg= +google.golang.org/api v0.177.0 h1:8a0p/BbPa65GlqGWtUKxot4p0TV8OGOfyTjtmkXNXmk= +google.golang.org/api v0.177.0/go.mod h1:srbhue4MLjkjbkux5p3dw/ocYOSZTaIEvf7bCOnFQDw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1085,8 +1083,8 @@ google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7Fc google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be h1:Zz7rLWqp0ApfsR/l7+zSHhY3PMiH2xqgxlfYfAfNpoU= google.golang.org/genproto/googleapis/api v0.0.0-20240415180920-8c6c420018be/go.mod h1:dvdCTIoAGbkWbcIKBniID56/7XHTt6WfxXNMxuziJ+w= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be h1:LG9vZxsWGOmUKieR8wPAUR3u3MpnYFQZROPIMaXh7/A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6 h1:DujSIu+2tC9Ht0aPNA7jgj23Iq8Ewi5sgkQ++wdvonE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240429193739-8cf5692501f6/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= @@ -1118,8 +1116,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 49fb77ab0..759da6540 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -14,9 +14,9 @@ package histogram import ( - "fmt" "math" "math/rand" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -2134,7 +2134,7 @@ func TestAllFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includeNeg { @@ -2360,7 +2360,7 @@ func TestAllReverseFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includePos { diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go index 14a948e64..d1a074135 100644 --- a/model/histogram/histogram_test.go +++ b/model/histogram/histogram_test.go @@ -14,8 +14,8 @@ package histogram import ( - "fmt" "math" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -72,7 +72,7 @@ func TestHistogramString(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { actualString := c.histogram.String() require.Equal(t, c.expectedString, actualString) }) @@ -211,7 +211,7 @@ func TestCumulativeBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.CumulativeBucketIterator() actualBuckets := make([]Bucket[uint64], 0, len(c.expectedBuckets)) for it.Next() { @@ -371,7 +371,7 @@ func TestRegularBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.PositiveBucketIterator() actualPositiveBuckets := make([]Bucket[uint64], 0, len(c.expectedPositiveBuckets)) for it.Next() { diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index 3d6e7659f..6464d007d 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -17,6 +17,7 @@ import ( "encoding/json" "fmt" "net/http" + "strconv" "strings" "testing" @@ -732,7 +733,7 @@ func TestScratchBuilder(t *testing.T) { want: FromStrings("ddd", "444"), }, } { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { b := NewScratchBuilder(len(tcase.add)) for _, lbl := range tcase.add { b.Add(lbl.Name, lbl.Value) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index 1282f80d6..8e220e392 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -14,7 +14,8 @@ package labels import ( - "fmt" + "bytes" + "strconv" ) // MatchType is an enum for label matching types. @@ -78,7 +79,29 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + // Start a buffer with a pre-allocated size on stack to cover most needs. + var bytea [1024]byte + b := bytes.NewBuffer(bytea[:0]) + + if m.shouldQuoteName() { + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Name)) + } else { + b.WriteString(m.Name) + } + b.WriteString(m.Type.String()) + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Value)) + + return b.String() +} + +func (m *Matcher) shouldQuoteName() bool { + for i, c := range m.Name { + if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (i > 0 && c >= '0' && c <= '9') { + continue + } + return true + } + return false } // Matches returns whether the matcher matches the given string value. diff --git a/model/labels/matcher_test.go b/model/labels/matcher_test.go index c23deafe6..ff39d40d0 100644 --- a/model/labels/matcher_test.go +++ b/model/labels/matcher_test.go @@ -15,6 +15,7 @@ package labels import ( "fmt" + "math/rand" "testing" "github.com/stretchr/testify/require" @@ -225,3 +226,128 @@ func BenchmarkNewMatcher(b *testing.B) { } }) } + +func BenchmarkMatcher_String(b *testing.B) { + type benchCase struct { + name string + matchers []*Matcher + } + cases := []benchCase{ + { + name: "short name equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", "bar"), + MustNewMatcher(MatchEqual, "bar", "baz"), + MustNewMatcher(MatchEqual, "abc", "def"), + MustNewMatcher(MatchEqual, "ghi", "klm"), + MustNewMatcher(MatchEqual, "nop", "qrs"), + }, + }, + { + name: "short quoted name not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "f.o", "bar"), + MustNewMatcher(MatchEqual, "b.r", "baz"), + MustNewMatcher(MatchEqual, "a.c", "def"), + MustNewMatcher(MatchEqual, "g.i", "klm"), + MustNewMatcher(MatchEqual, "n.p", "qrs"), + }, + }, + { + name: "short quoted name with quotes not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, `"foo"`, "bar"), + MustNewMatcher(MatchEqual, `"foo"`, "baz"), + MustNewMatcher(MatchEqual, `"foo"`, "def"), + MustNewMatcher(MatchEqual, `"foo"`, "klm"), + MustNewMatcher(MatchEqual, `"foo"`, "qrs"), + }, + }, + { + name: "short name value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `"bar"`), + MustNewMatcher(MatchEqual, "bar", `"baz"`), + MustNewMatcher(MatchEqual, "abc", `"def"`), + MustNewMatcher(MatchEqual, "ghi", `"klm"`), + MustNewMatcher(MatchEqual, "nop", `"qrs"`), + }, + }, + { + name: "short name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "foo", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "bar", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "abc", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "ghi", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "nop", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "short name and long value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `five_six_seven_eight_nine_ten_"one"_two_three_four`), + MustNewMatcher(MatchEqual, "bar", `one_two_three_four_five_six_"seven"_eight_nine_ten`), + MustNewMatcher(MatchEqual, "abc", `two_three_four_five_six_seven_"eight"_nine_ten_one`), + MustNewMatcher(MatchEqual, "ghi", `three_four_five_six_seven_eight_"nine"_ten_one_two`), + MustNewMatcher(MatchEqual, "nop", `four_five_six_seven_eight_nine_"ten"_one_two_three`), + }, + }, + { + name: "long name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "val"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "val"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "val"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "val"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "val"), + }, + }, + { + name: "long quoted name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "val"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "val"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "val"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "val"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "val"), + }, + }, + { + name: "long name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "long quoted name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "five.six.seven.eight.nine.ten.one.two.three.four"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "one.two.three.four.five.six.seven.eight.nine.ten"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "two.three.four.five.six.seven.eight.nine.ten.one"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "three.four.five.six.seven.eight.nine.ten.one.two"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "four.five.six.seven.eight.nine.ten.one.two.three"), + }, + }, + } + + var mixed []*Matcher + for _, bc := range cases { + mixed = append(mixed, bc.matchers...) + } + rand.Shuffle(len(mixed), func(i, j int) { mixed[i], mixed[j] = mixed[j], mixed[i] }) + cases = append(cases, benchCase{name: "mixed", matchers: mixed}) + + for _, bc := range cases { + b.Run(bc.name, func(b *testing.B) { + for i := 0; i <= b.N; i++ { + m := bc.matchers[i%len(bc.matchers)] + _ = m.String() + } + }) + } +} diff --git a/model/labels/regexp.go b/model/labels/regexp.go index f35dc76f6..b484e2716 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -16,6 +16,7 @@ package labels import ( "slices" "strings" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -827,8 +828,12 @@ type zeroOrOneCharacterStringMatcher struct { } func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { - // Zero or one. - if len(s) > 1 { + // If there's more than one rune in the string, then it can't match. + if r, size := utf8.DecodeRuneInString(s); r == utf8.RuneError { + // Size is 0 for empty strings, 1 for invalid rune. + // Empty string matches, invalid rune matches if there isn't anything else. + return size == len(s) + } else if size < len(s) { return false } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 3a15b52b4..1db90a473 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -19,6 +19,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -36,6 +37,7 @@ var ( ".*foo", "^.*foo$", "^.+foo$", + ".?", ".*", ".+", "foo.+", @@ -84,10 +86,16 @@ var ( "foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "", "FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", - "foofoo0", "foofoo", + "foofoo0", "foofoo", "😀foo0", // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", + + // Invalid utf8 + "\xfefoo", + "foo\xfe", + "\xfd", + "\xff\xff", } ) @@ -926,19 +934,91 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) { } func TestZeroOrOneCharacterStringMatcher(t *testing.T) { - matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.True(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.True(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) - matcher = &zeroOrOneCharacterStringMatcher{matchNL: false} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.False(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("do not match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: false} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.False(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) + + t.Run("unicode", func(t *testing.T) { + // Just for documentation purposes, emoji1 is 1 rune, emoji2 is 2 runes. + // Having this in mind, will make future readers fixing tests easier. + emoji1 := "😀" + emoji2 := "❤️" + require.Equal(t, 1, utf8.RuneCountInString(emoji1)) + require.Equal(t, 2, utf8.RuneCountInString(emoji2)) + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches(emoji1)) + require.False(t, matcher.Matches(emoji2)) + require.False(t, matcher.Matches(emoji1+emoji1)) + require.False(t, matcher.Matches("x"+emoji1)) + require.False(t, matcher.Matches(emoji1+"x")) + require.False(t, matcher.Matches(emoji1+emoji2)) + }) + + t.Run("invalid unicode", func(t *testing.T) { + // Just for reference, we also compare to what `^.?$` regular expression matches. + re := regexp.MustCompile("^.?$") + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + + requireMatches := func(s string, expected bool) { + t.Helper() + require.Equal(t, expected, matcher.Matches(s)) + require.Equal(t, re.MatchString(s), matcher.Matches(s)) + } + + requireMatches("\xff", true) + requireMatches("x\xff", false) + requireMatches("\xffx", false) + requireMatches("\xff\xfe", false) + }) +} + +func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { + type benchCase struct { + str string + matches bool + } + + emoji1 := "😀" + emoji2 := "❤️" + cases := []benchCase{ + {"", true}, + {"x", true}, + {"\n", true}, + {"xx", false}, + {"\n\n", false}, + {emoji1, true}, + {emoji2, false}, + {emoji1 + emoji1, false}, + {strings.Repeat("x", 100), false}, + {strings.Repeat(emoji1, 100), false}, + {strings.Repeat(emoji2, 100), false}, + } + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + b.ResetTimer() + + for n := 0; n < b.N; n++ { + c := cases[n%len(cases)] + got := matcher.Matches(c.str) + if got != c.matches { + b.Fatalf("unexpected result for %q: got %t, want %t", c.str, got, c.matches) + } + } } func TestLiteralPrefixStringMatcher(t *testing.T) { diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index d29c3d07a..db2e6ce2f 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -17,6 +17,7 @@ import ( "crypto/md5" "encoding/binary" "fmt" + "strconv" "strings" "github.com/grafana/regexp" @@ -48,7 +49,7 @@ const ( Drop Action = "drop" // KeepEqual drops targets for which the input does not match the target. KeepEqual Action = "keepequal" - // Drop drops targets for which the input does match the target. + // DropEqual drops targets for which the input does match the target. DropEqual Action = "dropequal" // HashMod sets a label to the modulus of a hash of labels. HashMod Action = "hashmod" @@ -290,7 +291,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { hash := md5.Sum([]byte(val)) // Use only the last 8 bytes of the hash to give the same result as earlier versions of this code. mod := binary.BigEndian.Uint64(hash[8:]) % cfg.Modulus - lb.Set(cfg.TargetLabel, fmt.Sprintf("%d", mod)) + lb.Set(cfg.TargetLabel, strconv.FormatUint(mod, 10)) case LabelMap: lb.Range(func(l labels.Label) { if cfg.Regex.MatchString(l.Name) { diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 6798fb02a..507ea101d 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -14,7 +14,7 @@ package relabel import ( - "fmt" + "strconv" "testing" "github.com/prometheus/common/model" @@ -657,7 +657,7 @@ func TestRelabelValidate(t *testing.T) { }, } for i, test := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { err := test.config.Validate() if test.expected == "" { require.NoError(t, err) diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index 4ed1619d6..bfb85ce74 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -136,10 +136,11 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` } // Rule describes an alerting or recording rule. diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index e7a9243bc..d2e72ca33 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -74,7 +74,7 @@ func TestHandlerNextBatch(t *testing.T) { for i := range make([]struct{}, 2*maxBatchSize+1) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -186,10 +186,10 @@ func TestHandlerSendAll(t *testing.T) { for i := range make([]struct{}, maxBatchSize) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) expected = append(expected, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -297,23 +297,23 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { for i := range make([]struct{}, maxBatchSize/2) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected1 = append(expected1, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected2 = append(expected2, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -502,7 +502,7 @@ func TestHandlerQueuing(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -762,7 +762,7 @@ func TestHangingNotifier(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } diff --git a/promql/bench_test.go b/promql/bench_test.go index 516b0d748..9a8529091 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -23,13 +23,14 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/teststorage" ) -func setupRangeQueryTestData(stor *teststorage.TestStorage, _ *Engine, interval, numIntervals int) error { +func setupRangeQueryTestData(stor *teststorage.TestStorage, _ *promql.Engine, interval, numIntervals int) error { ctx := context.Background() metrics := []labels.Labels{} @@ -249,13 +250,13 @@ func BenchmarkRangeQuery(b *testing.B) { stor := teststorage.New(b) stor.DB.DisableCompactions() // Don't want auto-compaction disrupting timings. defer stor.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, Timeout: 100 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. @@ -324,7 +325,7 @@ func BenchmarkNativeHistograms(b *testing.B) { }, } - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, @@ -338,7 +339,7 @@ func BenchmarkNativeHistograms(b *testing.B) { for _, tc := range cases { b.Run(tc.name, func(b *testing.B) { - ng := NewEngine(opts) + ng := promql.NewEngine(opts) for i := 0; i < b.N; i++ { qry, err := ng.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) if err != nil { diff --git a/promql/engine.go b/promql/engine.go index b8a8ea095..ea4bc1af8 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -573,7 +573,8 @@ func (ng *Engine) validateOpts(expr parser.Expr) error { return validationErr } -func (ng *Engine) newTestQuery(f func(context.Context) error) Query { +// NewTestQuery: inject special behaviour into Query for testing. +func (ng *Engine) NewTestQuery(f func(context.Context) error) Query { qry := &query{ q: "test statement", stmt: parser.TestStmt(f), @@ -2024,25 +2025,21 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, f, h, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) - if ok { - vec = append(vec, Sample{ - Metric: s.Labels(), - T: t, - F: f, - H: h, - }) - histSize := 0 - if h != nil { - histSize := h.Size() / 16 // 16 bytes per sample. - ev.currentSamples += histSize - } - ev.currentSamples++ + t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + if !ok { + continue + } - ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, int64(1+histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + // Note that we ignore the sample values because call only cares about the timestamp. + vec = append(vec, Sample{ + Metric: s.Labels(), + T: t, + }) + + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtTimestamp(enh.Ts, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) } } ev.samplesStats.UpdatePeak(ev.currentSamples) diff --git a/promql/engine_internal_test.go b/promql/engine_internal_test.go new file mode 100644 index 000000000..cb501b2fd --- /dev/null +++ b/promql/engine_internal_test.go @@ -0,0 +1,82 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "errors" + "testing" + + "github.com/go-kit/log" + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/util/annotations" +) + +func TestRecoverEvaluatorRuntime(t *testing.T) { + var output []interface{} + logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { + output = append(output, keyvals...) + return nil + })) + ev := &evaluator{logger: logger} + + expr, _ := parser.ParseExpr("sum(up)") + + var err error + + defer func() { + require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") + require.Contains(t, output, "sum(up)") + }() + defer ev.recover(expr, nil, &err) + + // Cause a runtime panic. + var a []int + a[123] = 1 +} + +func TestRecoverEvaluatorError(t *testing.T) { + ev := &evaluator{logger: log.NewNopLogger()} + var err error + + e := errors.New("custom error") + + defer func() { + require.EqualError(t, err, e.Error()) + }() + defer ev.recover(nil, nil, &err) + + panic(e) +} + +func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { + ev := &evaluator{logger: log.NewNopLogger()} + var err error + var ws annotations.Annotations + + warnings := annotations.New().Add(errors.New("custom warning")) + e := errWithWarnings{ + err: errors.New("custom error"), + warnings: warnings, + } + + defer func() { + require.EqualError(t, err, e.Error()) + require.Equal(t, warnings, ws, "wrong warning message") + }() + defer ev.recover(nil, &ws, &err) + + panic(e) +} diff --git a/promql/engine_test.go b/promql/engine_test.go index 0202c15ae..b7435d473 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -20,27 +20,34 @@ import ( "math" "os" "sort" + "strconv" + "sync" "testing" "time" - "github.com/go-kit/log" - "github.com/stretchr/testify/require" "go.uber.org/goleak" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) +const ( + env = "query execution" + defaultLookbackDelta = 5 * time.Minute + defaultEpsilon = 0.000001 // Relative error allowed for sample values. +) + func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } @@ -51,10 +58,12 @@ func TestQueryConcurrency(t *testing.T) { dir, err := os.MkdirTemp("", "test_concurrency") require.NoError(t, err) defer os.RemoveAll(dir) - queryTracker := NewActiveQueryTracker(dir, maxConcurrency, nil) - t.Cleanup(queryTracker.Close) + queryTracker := promql.NewActiveQueryTracker(dir, maxConcurrency, nil) + t.Cleanup(func() { + require.NoError(t, queryTracker.Close()) + }) - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, @@ -62,7 +71,7 @@ func TestQueryConcurrency(t *testing.T) { ActiveQueryTracker: queryTracker, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -84,9 +93,14 @@ func TestQueryConcurrency(t *testing.T) { return nil } + var wg sync.WaitGroup for i := 0; i < maxConcurrency; i++ { - q := engine.newTestQuery(f) - go q.Exec(ctx) + q := engine.NewTestQuery(f) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: // Expected. @@ -95,8 +109,12 @@ func TestQueryConcurrency(t *testing.T) { } } - q := engine.newTestQuery(f) - go q.Exec(ctx) + q := engine.NewTestQuery(f) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: @@ -119,20 +137,37 @@ func TestQueryConcurrency(t *testing.T) { for i := 0; i < maxConcurrency; i++ { block <- struct{}{} } + + wg.Wait() +} + +// contextDone returns an error if the context was canceled or timed out. +func contextDone(ctx context.Context, env string) error { + if err := ctx.Err(); err != nil { + switch { + case errors.Is(err, context.Canceled): + return promql.ErrQueryCanceled(env) + case errors.Is(err, context.DeadlineExceeded): + return promql.ErrQueryTimeout(env) + default: + return err + } + } + return nil } func TestQueryTimeout(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 5 * time.Millisecond, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { time.Sleep(100 * time.Millisecond) return contextDone(ctx, "test statement execution") }) @@ -140,20 +175,20 @@ func TestQueryTimeout(t *testing.T) { res := query.Exec(ctx) require.Error(t, res.Err, "expected timeout error but got none") - var e ErrQueryTimeout + var e promql.ErrQueryTimeout require.ErrorAs(t, res.Err, &e, "expected timeout error but got: %s", res.Err) } -const errQueryCanceled = ErrQueryCanceled("test statement execution") +const errQueryCanceled = promql.ErrQueryCanceled("test statement execution") func TestQueryCancel(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() @@ -161,13 +196,13 @@ func TestQueryCancel(t *testing.T) { block := make(chan struct{}) processing := make(chan struct{}) - query1 := engine.newTestQuery(func(ctx context.Context) error { + query1 := engine.NewTestQuery(func(ctx context.Context) error { processing <- struct{}{} <-block return contextDone(ctx, "test statement execution") }) - var res *Result + var res *promql.Result go func() { res = query1.Exec(ctx) @@ -183,7 +218,7 @@ func TestQueryCancel(t *testing.T) { require.Equal(t, errQueryCanceled, res.Err) // Canceling a query before starting it must have no effect. - query2 := engine.newTestQuery(func(ctx context.Context) error { + query2 := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) @@ -221,14 +256,14 @@ func (e errSeriesSet) Err() error { return e.err } func (e errSeriesSet) Warnings() annotations.Annotations { return nil } func TestQueryError(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) - errStorage := ErrStorage{errors.New("storage error")} + engine := promql.NewEngine(opts) + errStorage := promql.ErrStorage{errors.New("storage error")} queryable := storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) { return &errQuerier{err: errStorage}, nil }) @@ -270,7 +305,7 @@ func (h *hintRecordingQuerier) Select(ctx context.Context, sortSeries bool, hint } func TestSelectHintsSetCorrectly(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, @@ -561,11 +596,11 @@ func TestSelectHintsSetCorrectly(t *testing.T) { }, } { t.Run(tc.query, func(t *testing.T) { - engine := NewEngine(opts) + engine := promql.NewEngine(opts) hintsRecorder := &noopHintRecordingQueryable{} var ( - query Query + query promql.Query err error ) ctx := context.Background() @@ -586,13 +621,13 @@ func TestSelectHintsSetCorrectly(t *testing.T) { } func TestEngineShutdown(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) ctx, cancelCtx := context.WithCancel(context.Background()) block := make(chan struct{}) @@ -605,13 +640,13 @@ func TestEngineShutdown(t *testing.T) { <-block return contextDone(ctx, "test statement execution") } - query1 := engine.newTestQuery(f) + query1 := engine.NewTestQuery(f) // Stopping the engine must cancel the base context. While executing queries is // still possible, their context is canceled from the beginning and execution should // terminate immediately. - var res *Result + var res *promql.Result go func() { res = query1.Exec(ctx) processing <- struct{}{} @@ -625,7 +660,7 @@ func TestEngineShutdown(t *testing.T) { require.Error(t, res.Err, "expected error on shutdown during query but got none") require.Equal(t, errQueryCanceled, res.Err) - query2 := engine.newTestQuery(func(context.Context) error { + query2 := engine.NewTestQuery(func(context.Context) error { require.FailNow(t, "reached query execution unexpectedly") return nil }) @@ -635,12 +670,12 @@ func TestEngineShutdown(t *testing.T) { res2 := query2.Exec(ctx) require.Error(t, res2.Err, "expected error on querying with canceled context but got none") - var e ErrQueryCanceled + var e promql.ErrQueryCanceled require.ErrorAs(t, res2.Err, &e, "expected cancellation error but got: %s", res2.Err) } func TestEngineEvalStmtTimestamps(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric 1 2 `) @@ -657,13 +692,13 @@ load 10s // Instant queries. { Query: "1", - Result: Scalar{V: 1, T: 1000}, + Result: promql.Scalar{V: 1, T: 1000}, Start: time.Unix(1, 0), }, { Query: "metric", - Result: Vector{ - Sample{ + Result: promql.Vector{ + promql.Sample{ F: 1, T: 1000, Metric: labels.FromStrings("__name__", "metric"), @@ -673,9 +708,9 @@ load 10s }, { Query: "metric[20s]", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -684,9 +719,9 @@ load 10s // Range queries. { Query: "1", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, Metric: labels.EmptyLabels(), }, }, @@ -696,9 +731,9 @@ load 10s }, { Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 1000}, {F: 1, T: 2000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -708,9 +743,9 @@ load 10s }, { Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -727,7 +762,7 @@ load 10s for i, c := range cases { t.Run(fmt.Sprintf("%d query=%s", i, c.Query), func(t *testing.T) { var err error - var qry Query + var qry promql.Query engine := newTestEngine() if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start) @@ -749,7 +784,7 @@ load 10s } func TestQueryStatistics(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metricWith1SampleEvery10Seconds 1+1x100 metricWith3SampleEvery10Seconds{a="1",b="1"} 1+1x100 @@ -818,8 +853,8 @@ load 10s { Query: "timestamp(metricWith1HistogramEvery10Seconds)", Start: time.Unix(21, 0), - PeakSamples: 13, // histogram size 12 + 1 extra because of timestamp - TotalSamples: 1, // 1 float sample (because of timestamp) / 10 seconds + PeakSamples: 2, + TotalSamples: 1, // 1 float sample (because of timestamp) / 10 seconds TotalSamplesPerStep: stats.TotalSamplesPerStep{ 21000: 1, }, @@ -1116,7 +1151,7 @@ load 10s Start: time.Unix(201, 0), End: time.Unix(220, 0), Interval: 5 * time.Second, - PeakSamples: 16, + PeakSamples: 5, TotalSamples: 4, // 1 sample per query * 4 steps TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 1, @@ -1267,17 +1302,14 @@ load 10s }, } - engine := newTestEngine() - engine.enablePerStepStats = true - origMaxSamples := engine.maxSamplesPerQuery for _, c := range cases { t.Run(c.Query, func(t *testing.T) { - opts := NewPrometheusQueryOpts(true, 0) - engine.maxSamplesPerQuery = origMaxSamples + opts := promql.NewPrometheusQueryOpts(true, 0) + engine := promqltest.NewTestEngine(true, 0, promqltest.DefaultMaxSamplesPerQuery) runQuery := func(expErr error) *stats.Statistics { var err error - var qry Query + var qry promql.Query if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, opts, c.Query, c.Start) } else { @@ -1300,14 +1332,14 @@ load 10s if c.SkipMaxCheck { return } - engine.maxSamplesPerQuery = stats.Samples.PeakSamples - 1 - runQuery(ErrTooManySamples(env)) + engine = promqltest.NewTestEngine(true, 0, stats.Samples.PeakSamples-1) + runQuery(promql.ErrTooManySamples(env)) }) } } func TestMaxQuerySamples(t *testing.T) { - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric 1+1x100 bigmetric{a="1"} 1+1x100 @@ -1423,7 +1455,7 @@ load 10s Interval: 5 * time.Second, }, { - // Sample as above but with only 1 part as step invariant. + // promql.Sample as above but with only 1 part as step invariant. // Here the peak is caused by the non-step invariant part as it touches more time range. // Hence at peak it is 2*21 (subquery from 0s to 20s) // + 11 (buffer of a series per evaluation) @@ -1456,7 +1488,7 @@ load 10s engine := newTestEngine() testFunc := func(expError error) { var err error - var qry Query + var qry promql.Query if c.Interval == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.Query, c.Start) } else { @@ -1474,19 +1506,19 @@ load 10s } // Within limit. - engine.maxSamplesPerQuery = c.MaxSamples + engine = promqltest.NewTestEngine(false, 0, c.MaxSamples) testFunc(nil) // Exceeding limit. - engine.maxSamplesPerQuery = c.MaxSamples - 1 - testFunc(ErrTooManySamples(env)) + engine = promqltest.NewTestEngine(false, 0, c.MaxSamples-1) + testFunc(promql.ErrTooManySamples(env)) }) } } func TestAtModifier(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 10s metric{job="1"} 0+1x1000 metric{job="2"} 0+2x1000 @@ -1530,137 +1562,137 @@ load 1ms { // Time of the result is the evaluation time. query: `metric_neg @ 0`, start: 100, - result: Vector{ - Sample{F: 1, T: 100000, Metric: lblsneg}, + result: promql.Vector{ + promql.Sample{F: 1, T: 100000, Metric: lblsneg}, }, }, { query: `metric_neg @ -200`, start: 100, - result: Vector{ - Sample{F: 201, T: 100000, Metric: lblsneg}, + result: promql.Vector{ + promql.Sample{F: 201, T: 100000, Metric: lblsneg}, }, }, { query: `metric{job="2"} @ 50`, start: -2, end: 2, interval: 1, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 10, T: -2000}, {F: 10, T: -1000}, {F: 10, T: 0}, {F: 10, T: 1000}, {F: 10, T: 2000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 10, T: -2000}, {F: 10, T: -1000}, {F: 10, T: 0}, {F: 10, T: 1000}, {F: 10, T: 2000}}, Metric: lbls2, }, }, }, { // Timestamps for matrix selector does not depend on the evaluation time. query: "metric[20s] @ 300", start: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, Metric: lbls1, }, - Series{ - Floats: []FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, }, { query: `metric_neg[2s] @ 0`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, }, { query: `metric_neg[3s] @ -500`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, Metric: lblsneg, }, }, }, { query: `metric_ms[3ms] @ 2.345`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, Metric: lblsms, }, }, }, { query: "metric[100s:25s] @ 300", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, Metric: lbls1, }, - Series{ - Floats: []FPoint{{F: 40, T: 200000}, {F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 40, T: 200000}, {F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, }, { query: "metric_neg[50s:25s] @ 0", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, }, { query: "metric_neg[50s:25s] @ -100", start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}}, Metric: lblsneg, }, }, }, { query: `metric_ms[100ms:25ms] @ 2.345`, start: 100, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 2250, T: 2250}, {F: 2275, T: 2275}, {F: 2300, T: 2300}, {F: 2325, T: 2325}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2250, T: 2250}, {F: 2275, T: 2275}, {F: 2300, T: 2300}, {F: 2325, T: 2325}}, Metric: lblsms, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 100))`, start: 50, end: 80, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 995, T: 50000}, {F: 994, T: 60000}, {F: 993, T: 70000}, {F: 992, T: 80000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 995, T: 50000}, {F: 994, T: 60000}, {F: 993, T: 70000}, {F: 992, T: 80000}}, Metric: lblstopk3, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ 5000))`, start: 50, end: 80, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 10, T: 50000}, {F: 12, T: 60000}, {F: 14, T: 70000}, {F: 16, T: 80000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 10, T: 50000}, {F: 12, T: 60000}, {F: 14, T: 70000}, {F: 16, T: 80000}}, Metric: lblstopk2, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ end()))`, start: 70, end: 100, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 993, T: 70000}, {F: 992, T: 80000}, {F: 991, T: 90000}, {F: 990, T: 100000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 993, T: 70000}, {F: 992, T: 80000}, {F: 991, T: 90000}, {F: 990, T: 100000}}, Metric: lblstopk3, }, }, }, { query: `metric_topk and topk(1, sum_over_time(metric_topk[50s] @ start()))`, start: 100, end: 130, interval: 10, - result: Matrix{ - Series{ - Floats: []FPoint{{F: 990, T: 100000}, {F: 989, T: 110000}, {F: 988, T: 120000}, {F: 987, T: 130000}}, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 990, T: 100000}, {F: 989, T: 110000}, {F: 988, T: 120000}, {F: 987, T: 130000}}, Metric: lblstopk3, }, }, @@ -1669,9 +1701,9 @@ load 1ms // The trick here is that the query range should be > lookback delta. query: `timestamp(metric_timestamp @ 3600)`, start: 0, end: 7 * 60, interval: 60, - result: Matrix{ - Series{ - Floats: []FPoint{ + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{ {F: 3600, T: 0}, {F: 3600, T: 60 * 1000}, {F: 3600, T: 2 * 60 * 1000}, @@ -1694,7 +1726,7 @@ load 1ms } start, end, interval := time.Unix(c.start, 0), time.Unix(c.end, 0), time.Duration(c.interval)*time.Second var err error - var qry Query + var qry promql.Query if c.end == 0 { qry, err = engine.NewInstantQuery(context.Background(), storage, nil, c.query, start) } else { @@ -1704,76 +1736,19 @@ load 1ms res := qry.Exec(context.Background()) require.NoError(t, res.Err) - if expMat, ok := c.result.(Matrix); ok { + if expMat, ok := c.result.(promql.Matrix); ok { sort.Sort(expMat) - sort.Sort(res.Value.(Matrix)) + sort.Sort(res.Value.(promql.Matrix)) } testutil.RequireEqual(t, c.result, res.Value, "query %q failed", c.query) }) } } -func TestRecoverEvaluatorRuntime(t *testing.T) { - var output []interface{} - logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { - output = append(output, keyvals...) - return nil - })) - ev := &evaluator{logger: logger} - - expr, _ := parser.ParseExpr("sum(up)") - - var err error - - defer func() { - require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") - require.Contains(t, output, "sum(up)") - }() - defer ev.recover(expr, nil, &err) - - // Cause a runtime panic. - var a []int - a[123] = 1 -} - -func TestRecoverEvaluatorError(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} - var err error - - e := errors.New("custom error") - - defer func() { - require.EqualError(t, err, e.Error()) - }() - defer ev.recover(nil, nil, &err) - - panic(e) -} - -func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} - var err error - var ws annotations.Annotations - - warnings := annotations.New().Add(errors.New("custom warning")) - e := errWithWarnings{ - err: errors.New("custom error"), - warnings: warnings, - } - - defer func() { - require.EqualError(t, err, e.Error()) - require.Equal(t, warnings, ws, "wrong warning message") - }() - defer ev.recover(nil, &ws, &err) - - panic(e) -} - func TestSubquerySelector(t *testing.T) { type caseType struct { Query string - Result Result + Result promql.Result Start time.Time } @@ -1787,11 +1762,11 @@ func TestSubquerySelector(t *testing.T) { cases: []caseType{ { Query: "metric[20s:10s]", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1801,11 +1776,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s]", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1815,11 +1790,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 2s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1829,11 +1804,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 6s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 1, T: 5000}, {F: 2, T: 10000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1843,11 +1818,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 4s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1857,11 +1832,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 5s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1871,11 +1846,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 6s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1885,11 +1860,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: "metric[20s:5s] offset 7s", - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1908,11 +1883,11 @@ func TestSubquerySelector(t *testing.T) { cases: []caseType{ { // Normal selector. Query: `http_requests{group=~"pro.*",instance="0"}[30s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 9990, T: 9990000}, {F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 9990, T: 9990000}, {F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1922,11 +1897,11 @@ func TestSubquerySelector(t *testing.T) { }, { // Default step. Query: `http_requests{group=~"pro.*",instance="0"}[5m:]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 9840, T: 9840000}, {F: 9900, T: 9900000}, {F: 9960, T: 9960000}, {F: 130, T: 10020000}, {F: 310, T: 10080000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 9840, T: 9840000}, {F: 9900, T: 9900000}, {F: 9960, T: 9960000}, {F: 130, T: 10020000}, {F: 310, T: 10080000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1936,11 +1911,11 @@ func TestSubquerySelector(t *testing.T) { }, { // Checking if high offset (>LookbackDelta) is being taken care of. Query: `http_requests{group=~"pro.*",instance="0"}[5m:] offset 20m`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 8640, T: 8640000}, {F: 8700, T: 8700000}, {F: 8760, T: 8760000}, {F: 8820, T: 8820000}, {F: 8880, T: 8880000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 8640, T: 8640000}, {F: 8700, T: 8700000}, {F: 8760, T: 8760000}, {F: 8820, T: 8820000}, {F: 8880, T: 8880000}}, Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), }, }, @@ -1950,23 +1925,23 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `rate(http_requests[1m])[15s:5s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"), }, - Series{ - Floats: []FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"), }, - Series{ - Floats: []FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"), }, - Series{ - Floats: []FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"), }, }, @@ -1976,11 +1951,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 270, T: 90000}, {F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 270, T: 90000}, {F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -1990,11 +1965,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests)[40s:10s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 800, T: 80000}, {F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 800, T: 80000}, {F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -2004,11 +1979,11 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s:5s]`, - Result: Result{ + Result: promql.Result{ nil, - Matrix{ - Series{ - Floats: []FPoint{{F: 1000, T: 100000}, {F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}}, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1000, T: 100000}, {F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -2021,7 +1996,7 @@ func TestSubquerySelector(t *testing.T) { } { t.Run("", func(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, tst.loadString) + storage := promqltest.LoadedStorage(t, tst.loadString) t.Cleanup(func() { storage.Close() }) for _, c := range tst.cases { @@ -2031,7 +2006,7 @@ func TestSubquerySelector(t *testing.T) { res := qry.Exec(context.Background()) require.Equal(t, c.Result.Err, res.Err) - mat := res.Value.(Matrix) + mat := res.Value.(promql.Matrix) sort.Sort(mat) testutil.RequireEqual(t, c.Result.Value, mat) }) @@ -2042,7 +2017,7 @@ func TestSubquerySelector(t *testing.T) { func TestTimestampFunction_StepsMoreOftenThanSamples(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric 0+1x1000 `) @@ -2054,20 +2029,20 @@ load 1m interval := time.Second // We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. - expectedPoints := []FPoint{} + expectedPoints := []promql.FPoint{} for t := 0; t <= 59; t++ { - expectedPoints = append(expectedPoints, FPoint{F: 0, T: int64(t * 1000)}) + expectedPoints = append(expectedPoints, promql.FPoint{F: 0, T: int64(t * 1000)}) } expectedPoints = append( expectedPoints, - FPoint{F: 60, T: 60_000}, - FPoint{F: 60, T: 61_000}, + promql.FPoint{F: 60, T: 60_000}, + promql.FPoint{F: 60, T: 61_000}, ) - expectedResult := Matrix{ - Series{ + expectedResult := promql.Matrix{ + promql.Series{ Floats: expectedPoints, Metric: labels.EmptyLabels(), }, @@ -2104,25 +2079,25 @@ func (f *FakeQueryLogger) Log(l ...interface{}) error { } func TestQueryLogger_basic(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) queryExec := func() { ctx, cancelCtx := context.WithCancel(context.Background()) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) res := query.Exec(ctx) require.NoError(t, res.Err) } - // Query works without query log initialized. + // promql.Query works without query log initialized. queryExec() f1 := NewFakeQueryLogger() @@ -2155,21 +2130,21 @@ func TestQueryLogger_basic(t *testing.T) { } func TestQueryLogger_fields(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) ctx, cancelCtx := context.WithCancel(context.Background()) - ctx = NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) + ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) defer cancelCtx() - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return contextDone(ctx, "test statement execution") }) @@ -2184,22 +2159,22 @@ func TestQueryLogger_fields(t *testing.T) { } func TestQueryLogger_error(t *testing.T) { - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) ctx, cancelCtx := context.WithCancel(context.Background()) - ctx = NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) + ctx = promql.NewOriginContext(ctx, map[string]interface{}{"foo": "bar"}) defer cancelCtx() testErr := errors.New("failure") - query := engine.newTestQuery(func(ctx context.Context) error { + query := engine.NewTestQuery(func(ctx context.Context) error { return testErr }) @@ -3005,7 +2980,7 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { t.Run(test.input, func(t *testing.T) { expr, err := parser.ParseExpr(test.input) require.NoError(t, err) - expr = PreprocessExpr(expr, startTime, endTime) + expr = promql.PreprocessExpr(expr, startTime, endTime) if test.outputTest { require.Equal(t, test.input, expr.String(), "error on input '%s'", test.input) } @@ -3016,64 +2991,64 @@ func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { func TestEngineOptsValidation(t *testing.T) { cases := []struct { - opts EngineOpts + opts promql.EngineOpts query string fail bool expError error }{ { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ 100", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ 100", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ 100)", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ 100)", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ 100)", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ start()", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ start()", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ start())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ start())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ start())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "metric @ end()", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "metric @ end()", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1m] @ end())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: false}, - query: "rate(metric[1h:1m] @ end())", fail: true, expError: ErrValidationAtModifierDisabled, + opts: promql.EngineOpts{EnableAtModifier: false}, + query: "rate(metric[1h:1m] @ end())", fail: true, expError: promql.ErrValidationAtModifierDisabled, }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "metric @ 100", }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "rate(metric[1m] @ start())", }, { - opts: EngineOpts{EnableAtModifier: true}, + opts: promql.EngineOpts{EnableAtModifier: true}, query: "rate(metric[1h:1m] @ end())", }, { - opts: EngineOpts{EnableNegativeOffset: false}, - query: "metric offset -1s", fail: true, expError: ErrValidationNegativeOffsetDisabled, + opts: promql.EngineOpts{EnableNegativeOffset: false}, + query: "metric offset -1s", fail: true, expError: promql.ErrValidationNegativeOffsetDisabled, }, { - opts: EngineOpts{EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableNegativeOffset: true}, query: "metric offset -1s", }, { - opts: EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, query: "metric @ 100 offset -2m", }, { - opts: EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, + opts: promql.EngineOpts{EnableAtModifier: true, EnableNegativeOffset: true}, query: "metric offset -2m @ 100", }, } for _, c := range cases { - eng := NewEngine(c.opts) + eng := promql.NewEngine(c.opts) _, err1 := eng.NewInstantQuery(context.Background(), nil, nil, c.query, time.Unix(10, 0)) _, err2 := eng.NewRangeQuery(context.Background(), nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second) if c.fail { @@ -3101,9 +3076,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -3116,9 +3091,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000 0 0 0 0`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}}, Metric: labels.EmptyLabels(), }, }, @@ -3131,9 +3106,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 0 1 10 100 1000 10000 100000 1000000 10000000`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}, {F: 110000, T: 180000}, {F: 11000000, T: 240000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 0, T: 0}, {F: 11, T: 60000}, {F: 1100, T: 120000}, {F: 110000, T: 180000}, {F: 11000000, T: 240000}}, Metric: labels.EmptyLabels(), }, }, @@ -3146,9 +3121,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s bar 5 17 42 2 7 905 51`, Query: "sum_over_time(bar[30s])", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 5, T: 0}, {F: 59, T: 60000}, {F: 9, T: 120000}, {F: 956, T: 180000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 5, T: 0}, {F: 59, T: 60000}, {F: 9, T: 120000}, {F: 956, T: 180000}}, Metric: labels.EmptyLabels(), }, }, @@ -3161,9 +3136,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s metric 1+1x4`, Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -3176,9 +3151,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s metric 1+1x8`, Query: "metric", - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -3192,16 +3167,16 @@ func TestRangeQuery(t *testing.T) { foo{job="1"} 1+1x4 bar{job="2"} 1+1x4`, Query: `foo > 2 or bar`, - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 0}, {F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings( "__name__", "bar", "job", "2", ), }, - Series{ - Floats: []FPoint{{F: 3, T: 60000}, {F: 5, T: 120000}}, + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: 60000}, {F: 5, T: 120000}}, Metric: labels.FromStrings( "__name__", "foo", "job", "1", @@ -3217,9 +3192,9 @@ func TestRangeQuery(t *testing.T) { Load: `load 30s requests{job="1", __address__="bar"} 100`, Query: `requests * 2`, - Result: Matrix{ - Series{ - Floats: []FPoint{{F: 200, T: 0}, {F: 200, T: 60000}, {F: 200, T: 120000}}, + Result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 200, T: 0}, {F: 200, T: 60000}, {F: 200, T: 120000}}, Metric: labels.FromStrings( "__address__", "bar", "job", "1", @@ -3234,7 +3209,7 @@ func TestRangeQuery(t *testing.T) { for _, c := range cases { t.Run(c.Name, func(t *testing.T) { engine := newTestEngine() - storage := LoadedStorage(t, c.Load) + storage := promqltest.LoadedStorage(t, c.Load) t.Cleanup(func() { storage.Close() }) qry, err := engine.NewRangeQuery(context.Background(), storage, nil, c.Query, c.Start, c.End, c.Interval) @@ -3247,1076 +3222,6 @@ func TestRangeQuery(t *testing.T) { } } -func TestNativeHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, h := range tsdbutil.GenerateTestHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), h, nil) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[45s])", seriesName) - t.Run("instant_query", func(t *testing.T) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) - }) - - t.Run("range_query", func(t *testing.T) { - step := 30 * time.Second - start := timestamp.Time(int64(5 * time.Minute / time.Millisecond)) - end := start.Add(step) - qry, err := engine.NewRangeQuery(context.Background(), storage, nil, queryString, start, end, step) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - matrix, err := res.Matrix() - require.NoError(t, err) - require.Len(t, matrix, 1) - require.Len(t, matrix[0].Histograms, 2) - actualHistograms := matrix[0].Histograms - expectedHistograms := []HPoint{{ - T: 300000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }, { - T: 330000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }} - require.Equal(t, expectedHistograms, actualHistograms) - }) -} - -func TestNativeFloatHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, fh := range tsdbutil.GenerateTestFloatHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), nil, fh) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[1m])", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.226666666666667, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) -} - -func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - h := &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - } - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_count(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Count, vector[0].F) - } else { - require.Equal(t, float64(h.Count), vector[0].F) - } - - queryString = fmt.Sprintf("histogram_sum(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Sum, vector[0].F) - } else { - require.Equal(t, h.Sum, vector[0].F) - } - }) - } -} - -func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - testCases := []struct { - name string - h *histogram.Histogram - stdVar float64 - }{ - { - name: "1, 2, 3, 4 low-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 2}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.163807968526718, // actual variance: 1.25 - }, - { - name: "1, 2, 3, 4 hi-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 8, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 255, Length: 1}, - {Offset: 149, Length: 1}, - {Offset: 105, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.2471347737158793, // actual variance: 1.25 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: 62, - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: 1844.4651144196398, // actual variance: 1738.4082 - }, - { - name: "-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -112946, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 3}, - {Offset: 1, Length: 2}, - {Offset: 2, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 1}, - }, - NegativeBuckets: []int64{1, 0, 0, 0, 0, 2, -2, 0}, - }, - stdVar: 759352122.1939945, // actual variance: 882690990 - }, - { - name: "-10 x10", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -100, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 4, Length: 1}, - }, - NegativeBuckets: []int64{10}, - }, - stdVar: 1.725830020304794, // actual variance: 0 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, NaN", - h: &histogram.Histogram{ - Count: 8, - ZeroCount: 1, - Sum: math.NaN(), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, +Inf", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: math.Inf(1), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - } - for _, tc := range testCases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("%s floatHistogram=%t", tc.name, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, tc.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_stdvar(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, tc.stdVar, vector[0].F, 1e-12) - - queryString = fmt.Sprintf("histogram_stddev(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, math.Sqrt(tc.stdVar), vector[0].F, 1e-12) - }) - } - } -} - -func TestNativeHistogram_HistogramQuantile(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - quantile string - value float64 - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different quantiles to test for this histogram. - subCases []subCase - }{ - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.759999999999998, - }, - { - quantile: "0.9", - value: 13.600000000000001, - }, - { - quantile: "0.6", - value: 4.799999999999997, - }, - { - quantile: "0.5", - value: 1.6666666666666665, - }, - { // Zero bucket. - quantile: "0.1", - value: 0.0006000000000000001, - }, - { - quantile: "0", - value: 0, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { // Zero bucket. - quantile: "1", - value: 0, - }, - { // Zero bucket. - quantile: "0.99", - value: -6.000000000000048e-05, - }, - { // Zero bucket. - quantile: "0.9", - value: -0.0005999999999999996, - }, - { - quantile: "0.5", - value: -1.6666666666666667, - }, - { - quantile: "0.1", - value: -13.6, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.519999999999996, - }, - { - quantile: "0.9", - value: 11.200000000000003, - }, - { - quantile: "0.7", - value: 1.2666666666666657, - }, - { // Zero bucket. - quantile: "0.55", - value: 0.0006000000000000005, - }, - { // Zero bucket. - quantile: "0.5", - value: 0, - }, - { // Zero bucket. - quantile: "0.45", - value: -0.0005999999999999996, - }, - { - quantile: "0.3", - value: -1.266666666666667, - }, - { - quantile: "0.1", - value: -11.2, - }, - { - quantile: "0.01", - value: -15.52, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - } - - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s", j, sc.quantile), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_quantile(%s, %s)", sc.quantile, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.True(t, almostEqual(sc.value, vector[0].F, defaultEpsilon)) - }) - } - idx++ - }) - } - } -} - -func TestNativeHistogram_HistogramFraction(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - lower, upper string - value float64 - } - - invariantCases := []subCase{ - { - lower: "42", - upper: "3.1415", - value: 0, - }, - { - lower: "0", - upper: "0", - value: 0, - }, - { - lower: "0.000001", - upper: "0.000001", - value: 0, - }, - { - lower: "42", - upper: "42", - value: 0, - }, - { - lower: "-3.1", - upper: "-3.1", - value: 0, - }, - { - lower: "3.1415", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "42", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "-Inf", - upper: "+Inf", - value: 1, - }, - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different ranges to test for this histogram. - subCases []subCase - }{ - { - text: "empty histogram", - h: &histogram.Histogram{}, - subCases: []subCase{ - { - lower: "3.1415", - upper: "42", - value: math.NaN(), - }, - }, - }, - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, // Abs: 2, 3, 1, 4 - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 1, - }, - { - lower: "-Inf", - upper: "0", - value: 0, - }, - { - lower: "-0.001", - upper: "0", - value: 0, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.0005", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 12., - }, - { - lower: "-inf", - upper: "-0.001", - value: 0, - }, - { - lower: "1", - upper: "2", - value: 3. / 12., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 12., - }, - { - lower: "1", - upper: "8", - value: 4. / 12., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 12., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 12., - }, - { - lower: "-2", - upper: "-1", - value: 0, - }, - { - lower: "-2", - upper: "-1.5", - value: 0, - }, - { - lower: "-8", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1.5", - value: 0, - }, - }, invariantCases...), - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0, - }, - { - lower: "-Inf", - upper: "0", - value: 1, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.001", - value: 0, - }, - { - lower: "-0.0005", - upper: "0", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 0, - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 12., - }, - { - lower: "1", - upper: "2", - value: 0, - }, - { - lower: "1.5", - upper: "2", - value: 0, - }, - { - lower: "1", - upper: "8", - value: 0, - }, - { - lower: "1", - upper: "6", - value: 0, - }, - { - lower: "1.5", - upper: "6", - value: 0, - }, - { - lower: "-2", - upper: "-1", - value: 3. / 12., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 12., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 12., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 12., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 12., - }, - }, invariantCases...), - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0.5, - }, - { - lower: "-Inf", - upper: "0", - value: 0.5, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 24, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 24., - }, - { - lower: "-0.0005", - upper: "0.0005", - value: 2. / 24., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 24., - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 24., - }, - { - lower: "1", - upper: "2", - value: 3. / 24., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 24., - }, - { - lower: "1", - upper: "8", - value: 4. / 24., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 24., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 24., - }, - { - lower: "-2", - upper: "-1", - value: 3. / 24., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 24., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 24., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 24., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 24., - }, - }, invariantCases...), - }, - } - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s %s", j, sc.lower, sc.upper), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_fraction(%s, %s, %s)", sc.lower, sc.upper, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if math.IsNaN(sc.value) { - require.True(t, math.IsNaN(vector[0].F)) - return - } - require.Equal(t, sc.value, vector[0].F) - }) - } - idx++ - }) - } - } -} - func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // TODO(codesome): Integrate histograms into the PromQL testing framework // and write more tests there. @@ -4444,7 +3349,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { _, err := app.Append(0, labels.FromStrings("__name__", "float_series", "idx", "0"), ts, 42) require.NoError(t, err) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { @@ -4466,7 +3371,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { } require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, ts int64, exp Vector) { + queryAndCheck := func(queryString string, ts int64, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4490,7 +3395,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // sum(). queryString := fmt.Sprintf("sum(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) queryString = `sum({idx="0"})` var annos annotations.Annotations @@ -4502,26 +3407,26 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { for idx := 1; idx < len(c.histograms); idx++ { queryString += fmt.Sprintf(` + ignoring(idx) %s{idx="%d"}`, seriesName, idx) } - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) // count(). queryString = fmt.Sprintf("count(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) // avg(). queryString = fmt.Sprintf("avg(%s)", seriesName) - queryAndCheck(queryString, ts, []Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) offset := int64(len(c.histograms) - 1) newTs := ts + offset*int64(time.Minute/time.Millisecond) // sum_over_time(). queryString = fmt.Sprintf("sum_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}}) // avg_over_time(). queryString = fmt.Sprintf("avg_over_time(%s[%dm:1m])", seriesNameOverTime, offset) - queryAndCheck(queryString, newTs, []Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) }) idx0++ } @@ -4716,7 +3621,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { @@ -4728,7 +3633,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { } require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, exp Vector) { + queryAndCheck := func(queryString string, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4756,7 +3661,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { for idx := 1; idx < len(c.histograms); idx++ { queryString += fmt.Sprintf(` - ignoring(idx) %s{idx="%d"}`, seriesName, idx) } - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) }) } idx0++ @@ -4886,7 +3791,7 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { require.NoError(t, err) require.NoError(t, app.Commit()) - queryAndCheck := func(queryString string, exp Vector) { + queryAndCheck := func(queryString string, exp promql.Vector) { qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) require.NoError(t, err) @@ -4901,27 +3806,27 @@ func TestNativeHistogram_MulDivOperator(t *testing.T) { // histogram * scalar. queryString := fmt.Sprintf(`%s * %f`, seriesName, c.scalar) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // scalar * histogram. queryString = fmt.Sprintf(`%f * %s`, c.scalar, seriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // histogram * float. queryString = fmt.Sprintf(`%s * %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // float * histogram. queryString = fmt.Sprintf(`%s * %s`, floatSeriesName, seriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}}) // histogram / scalar. queryString = fmt.Sprintf(`%s / %f`, seriesName, c.scalar) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) // histogram / float. queryString = fmt.Sprintf(`%s / %s`, seriesName, floatSeriesName) - queryAndCheck(queryString, []Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) + queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}}) }) idx0++ } @@ -4991,20 +3896,17 @@ metric 0 1 2 for _, c := range cases { c := c t.Run(c.name, func(t *testing.T) { - engine := newTestEngine() - storage := LoadedStorage(t, load) + engine := promqltest.NewTestEngine(false, c.engineLookback, promqltest.DefaultMaxSamplesPerQuery) + storage := promqltest.LoadedStorage(t, load) t.Cleanup(func() { storage.Close() }) - if c.engineLookback != 0 { - engine.lookbackDelta = c.engineLookback - } - opts := NewPrometheusQueryOpts(false, c.queryLookback) + opts := promql.NewPrometheusQueryOpts(false, c.queryLookback) qry, err := engine.NewInstantQuery(context.Background(), storage, opts, query, c.ts) require.NoError(t, err) res := qry.Exec(context.Background()) require.NoError(t, res.Err) - vec, ok := res.Value.(Vector) + vec, ok := res.Value.(promql.Vector) require.True(t, ok) if c.expectSamples { require.NotEmpty(t, vec) @@ -5014,3 +3916,9 @@ metric 0 1 2 }) } } + +func makeInt64Pointer(val int64) *int64 { + valp := new(int64) + *valp = val + return valp +} diff --git a/promql/functions.go b/promql/functions.go index 2e15a1467..9b3be2287 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -948,15 +948,6 @@ func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHe return enh.Out, nil } -func kahanSum(samples []float64) float64 { - var sum, c float64 - - for _, v := range samples { - sum, c = kahanSumInc(v, sum, c) - } - return sum + c -} - func kahanSumInc(inc, sum, c float64) (newSum, newC float64) { t := sum + inc // Using Neumaier improvement, swap if next term larger than sum. diff --git a/promql/functions_test.go b/promql/functions_test.go index 6d5c3784e..aef59c837 100644 --- a/promql/functions_test.go +++ b/promql/functions_test.go @@ -11,11 +11,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" - "math" "testing" "time" @@ -23,6 +22,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/teststorage" ) @@ -33,13 +33,13 @@ func TestDeriv(t *testing.T) { // so we test it by hand. storage := teststorage.New(t) defer storage.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, Timeout: 10 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) a := storage.Appender(context.Background()) @@ -70,19 +70,13 @@ func TestDeriv(t *testing.T) { func TestFunctionList(t *testing.T) { // Test that Functions and parser.Functions list the same functions. - for i := range FunctionCalls { + for i := range promql.FunctionCalls { _, ok := parser.Functions[i] require.True(t, ok, "function %s exists in promql package, but not in parser package", i) } for i := range parser.Functions { - _, ok := FunctionCalls[i] + _, ok := promql.FunctionCalls[i] require.True(t, ok, "function %s exists in parser package, but not in promql package", i) } } - -func TestKahanSum(t *testing.T) { - vals := []float64{1.0, math.Pow(10, 100), 1.0, -1 * math.Pow(10, 100)} - expected := 2.0 - require.Equal(t, expected, kahanSum(vals)) -} diff --git a/promql/parser/printer.go b/promql/parser/printer.go index ff171f215..f3bdefdeb 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -204,8 +204,8 @@ func (node *VectorSelector) String() string { labelStrings = make([]string, 0, len(node.LabelMatchers)-1) } for _, matcher := range node.LabelMatchers { - // Only include the __name__ label if its equality matching and matches the name. - if matcher.Name == labels.MetricName && matcher.Type == labels.MatchEqual && matcher.Value == node.Name { + // Only include the __name__ label if its equality matching and matches the name, but don't skip if it's an explicit empty name matcher. + if matcher.Name == labels.MetricName && matcher.Type == labels.MatchEqual && matcher.Value == node.Name && matcher.Value != "" { continue } labelStrings = append(labelStrings, matcher.String()) diff --git a/promql/parser/printer_test.go b/promql/parser/printer_test.go index a044b6969..f224d841d 100644 --- a/promql/parser/printer_test.go +++ b/promql/parser/printer_test.go @@ -135,6 +135,19 @@ func TestExprString(t *testing.T) { { in: `a[1m] @ end()`, }, + { + in: `{__name__="",a="x"}`, + }, + { + in: `{"a.b"="c"}`, + }, + { + in: `{"0"="1"}`, + }, + { + in: `{"_0"="1"}`, + out: `{_0="1"}`, + }, } for _, test := range inputs { @@ -216,6 +229,16 @@ func TestVectorSelector_String(t *testing.T) { }, expected: `{__name__="foobar"}`, }, + { + name: "empty name matcher", + vs: VectorSelector{ + LabelMatchers: []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, ""), + labels.MustNewMatcher(labels.MatchEqual, "a", "x"), + }, + }, + expected: `{__name__="",a="x"}`, + }, } { t.Run(tc.name, func(t *testing.T) { require.Equal(t, tc.expected, tc.vs.String()) diff --git a/promql/promql_test.go b/promql/promql_test.go index 05821b1c1..7bafc02e3 100644 --- a/promql/promql_test.go +++ b/promql/promql_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "context" @@ -22,37 +22,30 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" ) -func newTestEngine() *Engine { - return NewEngine(EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10000, - Timeout: 100 * time.Second, - NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) }, - EnableAtModifier: true, - EnableNegativeOffset: true, - EnablePerStepStats: true, - }) +func newTestEngine() *promql.Engine { + return promqltest.NewTestEngine(false, 0, promqltest.DefaultMaxSamplesPerQuery) } func TestEvaluations(t *testing.T) { - RunBuiltinTests(t, newTestEngine()) + promqltest.RunBuiltinTests(t, newTestEngine()) } // Run a lot of queries at the same time, to check for race conditions. func TestConcurrentRangeQueries(t *testing.T) { stor := teststorage.New(t) defer stor.Close() - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 50000000, Timeout: 100 * time.Second, } - engine := NewEngine(opts) + engine := promql.NewEngine(opts) const interval = 10000 // 10s interval. // A day of data plus 10k steps. diff --git a/promql/test.go b/promql/promqltest/test.go similarity index 89% rename from promql/test.go rename to promql/promqltest/test.go index 1cdfe8d31..1affd91f6 100644 --- a/promql/test.go +++ b/promql/promqltest/test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promqltest import ( "context" @@ -19,7 +19,6 @@ import ( "errors" "fmt" "io/fs" - "math" "strconv" "strings" "testing" @@ -33,16 +32,16 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) var ( - minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. - patSpace = regexp.MustCompile("[\t ]+") patLoad = regexp.MustCompile(`^load\s+(.+?)$`) patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) @@ -50,7 +49,8 @@ var ( ) const ( - defaultEpsilon = 0.000001 // Relative error allowed for sample values. + defaultEpsilon = 0.000001 // Relative error allowed for sample values. + DefaultMaxSamplesPerQuery = 10000 ) var testStartTime = time.Unix(0, 0).UTC() @@ -72,8 +72,22 @@ func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { return test.storage } +func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamples int) *promql.Engine { + return promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: maxSamples, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) }, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: enablePerStepStats, + LookbackDelta: lookbackDelta, + }) +} + // RunBuiltinTests runs an acceptance test suite against the provided engine. -func RunBuiltinTests(t *testing.T, engine QueryEngine) { +func RunBuiltinTests(t *testing.T, engine promql.QueryEngine) { t.Cleanup(func() { parser.EnableExperimentalFunctions = false }) parser.EnableExperimentalFunctions = true @@ -90,11 +104,11 @@ func RunBuiltinTests(t *testing.T, engine QueryEngine) { } // RunTest parses and runs the test against the provided engine. -func RunTest(t testutil.T, input string, engine QueryEngine) { +func RunTest(t testutil.T, input string, engine promql.QueryEngine) { require.NoError(t, runTest(t, input, engine)) } -func runTest(t testutil.T, input string, engine QueryEngine) error { +func runTest(t testutil.T, input string, engine promql.QueryEngine) error { test, err := newTest(t, input) // Why do this before checking err? newTest() can create the test storage and then return an error, @@ -368,7 +382,7 @@ func (*evalCmd) testCmd() {} type loadCmd struct { gap time.Duration metrics map[uint64]labels.Labels - defs map[uint64][]Sample + defs map[uint64][]promql.Sample exemplars map[uint64][]exemplar.Exemplar } @@ -376,7 +390,7 @@ func newLoadCmd(gap time.Duration) *loadCmd { return &loadCmd{ gap: gap, metrics: map[uint64]labels.Labels{}, - defs: map[uint64][]Sample{}, + defs: map[uint64][]promql.Sample{}, exemplars: map[uint64][]exemplar.Exemplar{}, } } @@ -389,11 +403,11 @@ func (cmd loadCmd) String() string { func (cmd *loadCmd) set(m labels.Labels, vals ...parser.SequenceValue) { h := m.Hash() - samples := make([]Sample, 0, len(vals)) + samples := make([]promql.Sample, 0, len(vals)) ts := testStartTime for _, v := range vals { if !v.Omitted { - samples = append(samples, Sample{ + samples = append(samples, promql.Sample{ T: ts.UnixNano() / int64(time.Millisecond/time.Nanosecond), F: v.Value, H: v.Histogram, @@ -419,7 +433,7 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func appendSample(a storage.Appender, s Sample, m labels.Labels) error { +func appendSample(a storage.Appender, s promql.Sample, m labels.Labels) error { if s.H != nil { if _, err := a.AppendHistogram(0, m, s.T, nil, s.H); err != nil { return err @@ -503,7 +517,7 @@ func (ev *evalCmd) expectMetric(pos int, m labels.Labels, vals ...parser.Sequenc // compareResult compares the result value with the defined expectation. func (ev *evalCmd) compareResult(result parser.Value) error { switch val := result.(type) { - case Matrix: + case promql.Matrix: if ev.ordered { return fmt.Errorf("expected ordered result, but query returned a matrix") } @@ -521,8 +535,8 @@ func (ev *evalCmd) compareResult(result parser.Value) error { seen[hash] = true exp := ev.expected[hash] - var expectedFloats []FPoint - var expectedHistograms []HPoint + var expectedFloats []promql.FPoint + var expectedHistograms []promql.HPoint for i, e := range exp.vals { ts := ev.start.Add(time.Duration(i) * ev.step) @@ -534,9 +548,9 @@ func (ev *evalCmd) compareResult(result parser.Value) error { t := ts.UnixNano() / int64(time.Millisecond/time.Nanosecond) if e.Histogram != nil { - expectedHistograms = append(expectedHistograms, HPoint{T: t, H: e.Histogram}) + expectedHistograms = append(expectedHistograms, promql.HPoint{T: t, H: e.Histogram}) } else if !e.Omitted { - expectedFloats = append(expectedFloats, FPoint{T: t, F: e.Value}) + expectedFloats = append(expectedFloats, promql.FPoint{T: t, F: e.Value}) } } @@ -551,7 +565,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return fmt.Errorf("expected float value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s)) } - if !almostEqual(actual.F, expected.F, defaultEpsilon) { + if !almost.Equal(actual.F, expected.F, defaultEpsilon) { return fmt.Errorf("expected float value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.F, actual.F, formatSeriesResult(s)) } } @@ -575,7 +589,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } } - case Vector: + case promql.Vector: seen := map[uint64]bool{} for pos, v := range val { fp := v.Metric.Hash() @@ -601,7 +615,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if expH != nil && !expH.Compact(0).Equals(v.H) { return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H)) } - if !almostEqual(exp0.Value, v.F, defaultEpsilon) { + if !almost.Equal(exp0.Value, v.F, defaultEpsilon) { return fmt.Errorf("expected %v for %s but got %v", exp0.Value, v.Metric, v.F) } @@ -613,7 +627,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } } - case Scalar: + case promql.Scalar: if len(ev.expected) != 1 { return fmt.Errorf("expected vector result, but got scalar %s", val.String()) } @@ -621,7 +635,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { if exp0.Histogram != nil { return fmt.Errorf("expected Histogram %v but got scalar %s", exp0.Histogram.TestExpression(), val.String()) } - if !almostEqual(exp0.Value, val.V, defaultEpsilon) { + if !almost.Equal(exp0.Value, val.V, defaultEpsilon) { return fmt.Errorf("expected Scalar %v but got %v", val.V, exp0.Value) } @@ -631,7 +645,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { return nil } -func formatSeriesResult(s Series) string { +func formatSeriesResult(s promql.Series) string { floatPlural := "s" histogramPlural := "s" @@ -678,8 +692,7 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa // If there is a subquery, then the selectors inside it don't get the @ timestamp. // If any selector already has the @ timestamp set, then it is untouched. parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { - _, _, subqTs := subqueryTimes(path) - if subqTs != nil { + if hasAtModifier(path) { // There is a subquery with timestamp in the path, // hence don't change any timestamps further. return nil @@ -701,7 +714,7 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa } case *parser.Call: - _, ok := AtModifierUnsafeFunctions[n.Func.Name] + _, ok := promql.AtModifierUnsafeFunctions[n.Func.Name] containsNonStepInvariant = containsNonStepInvariant || ok } return nil @@ -729,8 +742,19 @@ func atModifierTestCases(exprStr string, evalTime time.Time) ([]atModifierTestCa return testCases, nil } +func hasAtModifier(path []parser.Node) bool { + for _, node := range path { + if n, ok := node.(*parser.SubqueryExpr); ok { + if n.Timestamp != nil { + return true + } + } + } + return false +} + // exec processes a single step of the test. -func (t *test) exec(tc testCommand, engine QueryEngine) error { +func (t *test) exec(tc testCommand, engine promql.QueryEngine) error { switch cmd := tc.(type) { case *clearCmd: t.clear() @@ -755,7 +779,7 @@ func (t *test) exec(tc testCommand, engine QueryEngine) error { return nil } -func (t *test) execEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execEval(cmd *evalCmd, engine promql.QueryEngine) error { if cmd.isRange { return t.execRangeEval(cmd, engine) } @@ -763,7 +787,7 @@ func (t *test) execEval(cmd *evalCmd, engine QueryEngine) error { return t.execInstantEval(cmd, engine) } -func (t *test) execRangeEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execRangeEval(cmd *evalCmd, engine promql.QueryEngine) error { q, err := engine.NewRangeQuery(t.context, t.storage, nil, cmd.expr, cmd.start, cmd.end, cmd.step) if err != nil { return fmt.Errorf("error creating range query for %q (line %d): %w", cmd.expr, cmd.line, err) @@ -788,7 +812,7 @@ func (t *test) execRangeEval(cmd *evalCmd, engine QueryEngine) error { return nil } -func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { +func (t *test) execInstantEval(cmd *evalCmd, engine promql.QueryEngine) error { queries, err := atModifierTestCases(cmd.expr, cmd.start) if err != nil { return err @@ -830,29 +854,29 @@ func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { // Range queries are always sorted by labels, so skip this test case that expects results in a particular order. continue } - mat := rangeRes.Value.(Matrix) + mat := rangeRes.Value.(promql.Matrix) if err := assertMatrixSorted(mat); err != nil { return err } - vec := make(Vector, 0, len(mat)) + vec := make(promql.Vector, 0, len(mat)) for _, series := range mat { // We expect either Floats or Histograms. for _, point := range series.Floats { if point.T == timeMilliseconds(iq.evalTime) { - vec = append(vec, Sample{Metric: series.Metric, T: point.T, F: point.F}) + vec = append(vec, promql.Sample{Metric: series.Metric, T: point.T, F: point.F}) break } } for _, point := range series.Histograms { if point.T == timeMilliseconds(iq.evalTime) { - vec = append(vec, Sample{Metric: series.Metric, T: point.T, H: point.H}) + vec = append(vec, promql.Sample{Metric: series.Metric, T: point.T, H: point.H}) break } } } - if _, ok := res.Value.(Scalar); ok { - err = cmd.compareResult(Scalar{V: vec[0].F}) + if _, ok := res.Value.(promql.Scalar); ok { + err = cmd.compareResult(promql.Scalar{V: vec[0].F}) } else { err = cmd.compareResult(vec) } @@ -864,7 +888,7 @@ func (t *test) execInstantEval(cmd *evalCmd, engine QueryEngine) error { return nil } -func assertMatrixSorted(m Matrix) error { +func assertMatrixSorted(m promql.Matrix) error { if len(m) <= 1 { return nil } @@ -894,29 +918,6 @@ func (t *test) clear() { t.context, t.cancelCtx = context.WithCancel(context.Background()) } -// almostEqual returns true if a and b differ by less than their sum -// multiplied by epsilon. -func almostEqual(a, b, epsilon float64) bool { - // NaN has no equality but for testing we still want to know whether both values - // are NaN. - if math.IsNaN(a) && math.IsNaN(b) { - return true - } - - // Cf. http://floating-point-gui.de/errors/comparison/ - if a == b { - return true - } - - absSum := math.Abs(a) + math.Abs(b) - diff := math.Abs(a - b) - - if a == 0 || b == 0 || absSum < minNormal { - return diff < epsilon*minNormal - } - return diff/math.Min(absSum, math.MaxFloat64) < epsilon -} - func parseNumber(s string) (float64, error) { n, err := strconv.ParseInt(s, 0, 64) f := float64(n) @@ -937,7 +938,7 @@ type LazyLoader struct { storage storage.Storage SubqueryInterval time.Duration - queryEngine *Engine + queryEngine *promql.Engine context context.Context cancelCtx context.CancelFunc @@ -1004,7 +1005,7 @@ func (ll *LazyLoader) clear() error { return err } - opts := EngineOpts{ + opts := promql.EngineOpts{ Logger: nil, Reg: nil, MaxSamples: 10000, @@ -1014,7 +1015,7 @@ func (ll *LazyLoader) clear() error { EnableNegativeOffset: ll.opts.EnableNegativeOffset, } - ll.queryEngine = NewEngine(opts) + ll.queryEngine = promql.NewEngine(opts) ll.context, ll.cancelCtx = context.WithCancel(context.Background()) return nil } @@ -1048,7 +1049,7 @@ func (ll *LazyLoader) WithSamplesTill(ts time.Time, fn func(error)) { } // QueryEngine returns the LazyLoader's query engine. -func (ll *LazyLoader) QueryEngine() *Engine { +func (ll *LazyLoader) QueryEngine() *promql.Engine { return ll.queryEngine } @@ -1074,3 +1075,17 @@ func (ll *LazyLoader) Close() error { ll.cancelCtx() return ll.storage.Close() } + +func makeInt64Pointer(val int64) *int64 { + valp := new(int64) + *valp = val + return valp +} + +func timeMilliseconds(t time.Time) int64 { + return t.UnixNano() / int64(time.Millisecond/time.Nanosecond) +} + +func durationMilliseconds(d time.Duration) int64 { + return int64(d / (time.Millisecond / time.Nanosecond)) +} diff --git a/promql/test_test.go b/promql/promqltest/test_test.go similarity index 88% rename from promql/test_test.go rename to promql/promqltest/test_test.go index a5b24ac69..f6fe38707 100644 --- a/promql/test_test.go +++ b/promql/promqltest/test_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promqltest import ( "math" @@ -21,14 +21,15 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/tsdb/chunkenc" ) func TestLazyLoader_WithSamplesTill(t *testing.T) { type testCase struct { ts time.Time - series []Series // Each series is checked separately. Need not mention all series here. - checkOnlyError bool // If this is true, series is not checked. + series []promql.Series // Each series is checked separately. Need not mention all series here. + checkOnlyError bool // If this is true, series is not checked. } cases := []struct { @@ -44,33 +45,33 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { testCases: []testCase{ { ts: time.Unix(40, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, + Floats: []promql.FPoint{ + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, }, }, }, }, { ts: time.Unix(10, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, + Floats: []promql.FPoint{ + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, }, }, }, }, { ts: time.Unix(60, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, + Floats: []promql.FPoint{ + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, {T: 50000, F: 6}, {T: 60000, F: 7}, }, }, }, @@ -86,17 +87,17 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { testCases: []testCase{ { // Adds all samples of metric1. ts: time.Unix(70, 0), - series: []Series{ + series: []promql.Series{ { Metric: labels.FromStrings("__name__", "metric1"), - Floats: []FPoint{ - {0, 1}, {10000, 1}, {20000, 1}, {30000, 1}, {40000, 1}, {50000, 1}, + Floats: []promql.FPoint{ + {T: 0, F: 1}, {T: 10000, F: 1}, {T: 20000, F: 1}, {T: 30000, F: 1}, {T: 40000, F: 1}, {T: 50000, F: 1}, }, }, { Metric: labels.FromStrings("__name__", "metric2"), - Floats: []FPoint{ - {0, 1}, {10000, 2}, {20000, 3}, {30000, 4}, {40000, 5}, {50000, 6}, {60000, 7}, {70000, 8}, + Floats: []promql.FPoint{ + {T: 0, F: 1}, {T: 10000, F: 2}, {T: 20000, F: 3}, {T: 30000, F: 4}, {T: 40000, F: 5}, {T: 50000, F: 6}, {T: 60000, F: 7}, {T: 70000, F: 8}, }, }, }, @@ -140,13 +141,13 @@ func TestLazyLoader_WithSamplesTill(t *testing.T) { require.False(t, ss.Next(), "Expecting only 1 series") // Convert `storage.Series` to `promql.Series`. - got := Series{ + got := promql.Series{ Metric: storageSeries.Labels(), } it := storageSeries.Iterator(nil) for it.Next() == chunkenc.ValFloat { t, v := it.At() - got.Floats = append(got.Floats, FPoint{T: t, F: v}) + got.Floats = append(got.Floats, promql.FPoint{T: t, F: v}) } require.NoError(t, it.Err()) @@ -450,7 +451,7 @@ eval range from 0 to 5m step 5m testmetric for name, testCase := range testCases { t.Run(name, func(t *testing.T) { - err := runTest(t, testCase.input, newTestEngine()) + err := runTest(t, testCase.input, NewTestEngine(false, 0, DefaultMaxSamplesPerQuery)) if testCase.expectedError == "" { require.NoError(t, err) @@ -463,42 +464,42 @@ eval range from 0 to 5m step 5m testmetric func TestAssertMatrixSorted(t *testing.T) { testCases := map[string]struct { - matrix Matrix + matrix promql.Matrix expectedError string }{ "empty matrix": { - matrix: Matrix{}, + matrix: promql.Matrix{}, }, "matrix with one series": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, }, }, "matrix with two series, series in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, }, }, "matrix with two series, series in reverse order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_2")}, - Series{Metric: labels.FromStrings("the_label", "value_1")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, }, expectedError: `matrix results should always be sorted by labels, but matrix is not sorted: series at index 1 with labels {the_label="value_1"} sorts before series at index 0 with labels {the_label="value_2"}`, }, "matrix with three series, series in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, - Series{Metric: labels.FromStrings("the_label", "value_3")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_3")}, }, }, "matrix with three series, series not in sorted order": { - matrix: Matrix{ - Series{Metric: labels.FromStrings("the_label", "value_1")}, - Series{Metric: labels.FromStrings("the_label", "value_3")}, - Series{Metric: labels.FromStrings("the_label", "value_2")}, + matrix: promql.Matrix{ + promql.Series{Metric: labels.FromStrings("the_label", "value_1")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_3")}, + promql.Series{Metric: labels.FromStrings("the_label", "value_2")}, }, expectedError: `matrix results should always be sorted by labels, but matrix is not sorted: series at index 2 with labels {the_label="value_2"} sorts before series at index 1 with labels {the_label="value_3"}`, }, diff --git a/promql/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test similarity index 100% rename from promql/testdata/aggregators.test rename to promql/promqltest/testdata/aggregators.test diff --git a/promql/testdata/at_modifier.test b/promql/promqltest/testdata/at_modifier.test similarity index 100% rename from promql/testdata/at_modifier.test rename to promql/promqltest/testdata/at_modifier.test diff --git a/promql/testdata/collision.test b/promql/promqltest/testdata/collision.test similarity index 100% rename from promql/testdata/collision.test rename to promql/promqltest/testdata/collision.test diff --git a/promql/testdata/functions.test b/promql/promqltest/testdata/functions.test similarity index 99% rename from promql/testdata/functions.test rename to promql/promqltest/testdata/functions.test index e01c75a7f..2c198374a 100644 --- a/promql/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -764,6 +764,14 @@ eval instant at 1m avg_over_time(metric10[1m]) eval instant at 1m sum_over_time(metric10[1m])/count_over_time(metric10[1m]) {} 0 +# Test if very big intermediate values cause loss of detail. +clear +load 10s + metric 1 1e100 1 -1e100 + +eval instant at 1m sum_over_time(metric[1m]) + {} 2 + # Tests for stddev_over_time and stdvar_over_time. clear load 10s diff --git a/promql/testdata/histograms.test b/promql/promqltest/testdata/histograms.test similarity index 100% rename from promql/testdata/histograms.test rename to promql/promqltest/testdata/histograms.test diff --git a/promql/testdata/literals.test b/promql/promqltest/testdata/literals.test similarity index 100% rename from promql/testdata/literals.test rename to promql/promqltest/testdata/literals.test diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test new file mode 100644 index 000000000..37818e4f8 --- /dev/null +++ b/promql/promqltest/testdata/native_histograms.test @@ -0,0 +1,716 @@ +# Minimal valid case: an empty histogram. +load 5m + empty_histogram {{}} + +eval instant at 5m empty_histogram + {__name__="empty_histogram"} {{}} + +eval instant at 5m histogram_count(empty_histogram) + {} 0 + +eval instant at 5m histogram_sum(empty_histogram) + {} 0 + +eval instant at 5m histogram_avg(empty_histogram) + {} NaN + +eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) + {} NaN + +eval instant at 5m histogram_fraction(0, 8, empty_histogram) + {} NaN + + + +# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). +load 5m + single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} + +# histogram_count extracts the count property from the histogram. +eval instant at 5m histogram_count(single_histogram) + {} 4 + +# histogram_sum extracts the sum property from the histogram. +eval instant at 5m histogram_sum(single_histogram) + {} 5 + +# histogram_avg calculates the average from sum and count properties. +eval instant at 5m histogram_avg(single_histogram) + {} 1.25 + +# We expect half of the values to fall in the range 1 < x <= 2. +eval instant at 5m histogram_fraction(1, 2, single_histogram) + {} 0.5 + +# We expect all values to fall in the range 0 < x <= 8. +eval instant at 5m histogram_fraction(0, 8, single_histogram) + {} 1 + +# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. +eval instant at 5m histogram_quantile(0.5, single_histogram) + {} 1.5 + + + +# Repeat the same histogram 10 times. +load 5m + multi_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10 + +eval instant at 5m histogram_count(multi_histogram) + {} 4 + +eval instant at 5m histogram_sum(multi_histogram) + {} 5 + +eval instant at 5m histogram_avg(multi_histogram) + {} 1.25 + +eval instant at 5m histogram_fraction(1, 2, multi_histogram) + {} 0.5 + +eval instant at 5m histogram_quantile(0.5, multi_histogram) + {} 1.5 + + +# Each entry should look the same as the first. +eval instant at 50m histogram_count(multi_histogram) + {} 4 + +eval instant at 50m histogram_sum(multi_histogram) + {} 5 + +eval instant at 50m histogram_avg(multi_histogram) + {} 1.25 + +eval instant at 50m histogram_fraction(1, 2, multi_histogram) + {} 0.5 + +eval instant at 50m histogram_quantile(0.5, multi_histogram) + {} 1.5 + + + +# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket +# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket +# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0. +load 5m + incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 + +eval instant at 5m histogram_count(incr_histogram) + {} 5 + +eval instant at 5m histogram_sum(incr_histogram) + {} 6 + +eval instant at 5m histogram_avg(incr_histogram) + {} 1.2 + +# We expect 3/5ths of the values to fall in the range 1 < x <= 2. +eval instant at 5m histogram_fraction(1, 2, incr_histogram) + {} 0.6 + +eval instant at 5m histogram_quantile(0.5, incr_histogram) + {} 1.5 + + +eval instant at 50m incr_histogram + {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} + +eval instant at 50m histogram_count(incr_histogram) + {} 14 + +eval instant at 50m histogram_sum(incr_histogram) + {} 24 + +eval instant at 50m histogram_avg(incr_histogram) + {} 1.7142857142857142 + +# We expect 12/14ths of the values to fall in the range 1 < x <= 2. +eval instant at 50m histogram_fraction(1, 2, incr_histogram) + {} 0.8571428571428571 + +eval instant at 50m histogram_quantile(0.5, incr_histogram) + {} 1.5 + +# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. +eval instant at 50m rate(incr_histogram[5m]) + {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} + +# Calculate the 50th percentile of observations over the last 10m. +eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) + {} 1.5 + + + +# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: +# 0: 1 2 4 8 16 32 64 (higher resolution) +# -1: 1 4 16 64 (lower resolution) +# +# Histograms can be merged as long as the histogram to the right is same resolution or higher. +load 5m + low_res_histogram {{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1 + +eval instant at 5m low_res_histogram + {__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}} + +eval instant at 5m histogram_count(low_res_histogram) + {} 5 + +eval instant at 5m histogram_sum(low_res_histogram) + {} 8 + +eval instant at 5m histogram_avg(low_res_histogram) + {} 1.6 + +# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4. +eval instant at 5m histogram_fraction(1, 4, low_res_histogram) + {} 1 + + + +# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range +# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. +load 5m + single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} + +eval instant at 5m histogram_count(single_zero_histogram) + {} 1 + +eval instant at 5m histogram_sum(single_zero_histogram) + {} 0.25 + +eval instant at 5m histogram_avg(single_zero_histogram) + {} 0.25 + +# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally +# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the +# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. +eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) + {} 1 + +# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. +eval instant at 5m histogram_quantile(0.5, single_zero_histogram) + {} 0 + + + +# Let's turn single_histogram upside-down. +load 5m + negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} + +eval instant at 5m histogram_count(negative_histogram) + {} 4 + +eval instant at 5m histogram_sum(negative_histogram) + {} -5 + +eval instant at 5m histogram_avg(negative_histogram) + {} -1.25 + +# We expect half of the values to fall in the range -2 < x <= -1. +eval instant at 5m histogram_fraction(-2, -1, negative_histogram) + {} 0.5 + +eval instant at 5m histogram_quantile(0.5, negative_histogram) + {} -1.5 + + + +# Two histogram samples. +load 5m + two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} + +# We expect to see the newest sample. +eval instant at 10m histogram_count(two_samples_histogram) + {} 4 + +eval instant at 10m histogram_sum(two_samples_histogram) + {} -4 + +eval instant at 10m histogram_avg(two_samples_histogram) + {} -1 + +eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) + {} 0.5 + +eval instant at 10m histogram_quantile(0.5, two_samples_histogram) + {} -1.5 + + + +# Add two histograms with negated data. +load 5m + balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1 + +eval instant at 5m histogram_count(balanced_histogram) + {} 8 + +eval instant at 5m histogram_sum(balanced_histogram) + {} 0 + +eval instant at 5m histogram_avg(balanced_histogram) + {} 0 + +eval instant at 5m histogram_fraction(0, 4, balanced_histogram) + {} 0.5 + +# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of +# the first populated bucket after the span of empty buckets. +eval instant at 5m histogram_quantile(0.5, balanced_histogram) + {} 0.5 + +# Add histogram to test sum(last_over_time) regression +load 5m + incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 + incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10 + +eval instant at 50m histogram_sum(sum(incr_sum_histogram)) + {} 30 + +eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) + {} 30 + +# Apply rate function to histogram. +load 15s + histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 + +eval instant at 5m rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} + +eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 + +# Apply count and sum function to histogram. +load 10m + histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_count(histogram_count_sum_2) + {} 24 + +eval instant at 10m histogram_sum(histogram_count_sum_2) + {} 100 + +# Apply stddev and stdvar function to histogram with {1, 2, 3, 4} (low res). +load 10m + histogram_stddev_stdvar_1 {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_1) + {} 1.0787993180043811 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_1) + {} 1.163807968526718 + +# Apply stddev and stdvar function to histogram with {1, 1, 1, 1} (high res). +load 10m + histogram_stddev_stdvar_2 {{schema:8 count:10 sum:10 buckets:[1 2 3 4]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_2) + {} 0.0048960313898237465 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_2) + {} 2.3971123370139447e-05 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9}. +load 10m + histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) + {} 42.947236400258 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) + {} 1844.4651144196398 + +# Apply stddev and stdvar function to histogram with {-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3}. +load 10m + histogram_stddev_stdvar_4 {{schema:0 count:10 sum:-112946 z_bucket:0 n_buckets:[0 0 1 1 1 0 1 1 0 0 3 0 0 0 1 0 0 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_4) + {} 27556.344499842 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_4) + {} 759352122.1939945 + +# Apply stddev and stdvar function to histogram with {-10x10}. +load 10m + histogram_stddev_stdvar_5 {{schema:0 count:10 sum:-100 z_bucket:0 n_buckets:[0 0 0 0 10]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_5) + {} 1.3137084989848 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_5) + {} 1.725830020304794 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, NaN}. +load 10m + histogram_stddev_stdvar_6 {{schema:3 count:7 sum:NaN z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_6) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_6) + {} NaN + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, Inf}. +load 10m + histogram_stddev_stdvar_7 {{schema:3 count:7 sum:Inf z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) + {} NaN + +# Apply quantile function to histogram with all positive buckets with zero bucket. +load 10m + histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_1) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_1) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) + {} 15.759999999999998 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) + {} 13.600000000000001 + +eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) + {} 4.799999999999997 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) + {} 1.6666666666666665 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) + {} 0.0006000000000000001 + +eval instant at 10m histogram_quantile(0, histogram_quantile_1) + {} 0 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_1) + {} -Inf + +# Apply quantile function to histogram with all negative buckets with zero bucket. +load 10m + histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_2) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_2) + {} 0 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) + {} -6.000000000000048e-05 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) + {} -1.6666666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) + {} -13.6 + +eval instant at 10m histogram_quantile(0, histogram_quantile_2) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_2) + {} -Inf + +# Apply quantile function to histogram with both positive and negative buckets with zero bucket. +load 10m + histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_3) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_3) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) + {} 15.519999999999996 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) + {} 11.200000000000003 + +eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) + {} 1.2666666666666657 + +eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) + {} 0.0006000000000000005 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) + {} 0 + +eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) + {} -1.266666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) + {} -11.2 + +eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) + {} -15.52 + +eval instant at 10m histogram_quantile(0, histogram_quantile_3) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_3) + {} -Inf + +# Apply fraction function to empty histogram. +load 10m + histogram_fraction_1 {{}}x1 + +eval instant at 10m histogram_fraction(3.1415, 42, histogram_fraction_1) + {} NaN + +# Apply fraction function to histogram with positive and zero buckets. +load 10m + histogram_fraction_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_2) + {} 1 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) + {} 0.25 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) + {} 0.125 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_2) + {} 1 + +# Apply fraction function to histogram with negative and zero buckets. +load 10m + histogram_fraction_3 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_3) + {} 1 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_3) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) + {} 0.25 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) + {} 0.125 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_3) + {} 1 + +# Apply fraction function to histogram with both positive, negative and zero buckets. +load 10m + histogram_fraction_4 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) + {} 1 diff --git a/promql/testdata/operators.test b/promql/promqltest/testdata/operators.test similarity index 100% rename from promql/testdata/operators.test rename to promql/promqltest/testdata/operators.test diff --git a/promql/testdata/selectors.test b/promql/promqltest/testdata/selectors.test similarity index 100% rename from promql/testdata/selectors.test rename to promql/promqltest/testdata/selectors.test diff --git a/promql/testdata/staleness.test b/promql/promqltest/testdata/staleness.test similarity index 100% rename from promql/testdata/staleness.test rename to promql/promqltest/testdata/staleness.test diff --git a/promql/testdata/subquery.test b/promql/promqltest/testdata/subquery.test similarity index 100% rename from promql/testdata/subquery.test rename to promql/promqltest/testdata/subquery.test diff --git a/promql/testdata/trig_functions.test b/promql/promqltest/testdata/trig_functions.test similarity index 100% rename from promql/testdata/trig_functions.test rename to promql/promqltest/testdata/trig_functions.test diff --git a/promql/quantile.go b/promql/quantile.go index 6a225afb1..d4bc9ee6e 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -20,6 +20,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/almost" ) // smallDeltaTolerance is the threshold for relative deltas between classic @@ -397,7 +398,7 @@ func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bo // No correction needed if the counts are identical between buckets. continue } - if almostEqual(prev, curr, tolerance) { + if almost.Equal(prev, curr, tolerance) { // Silently correct numerically insignificant differences from floating // point precision errors, regardless of direction. // Do not update the 'prev' value as we are ignoring the difference. diff --git a/promql/query_logger.go b/promql/query_logger.go index fa4e1fb07..7e06ebb97 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -16,6 +16,8 @@ package promql import ( "context" "encoding/json" + "errors" + "fmt" "io" "os" "path/filepath" @@ -36,6 +38,8 @@ type ActiveQueryTracker struct { maxConcurrent int } +var _ io.Closer = &ActiveQueryTracker{} + type Entry struct { Query string `json:"query"` Timestamp int64 `json:"timestamp_sec"` @@ -83,6 +87,23 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { } } +type mmapedFile struct { + f io.Closer + m mmap.MMap +} + +func (f *mmapedFile) Close() error { + err := f.m.Unmap() + if err != nil { + err = fmt.Errorf("mmapedFile: unmapping: %w", err) + } + if fErr := f.f.Close(); fErr != nil { + return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) + } + + return err +} + func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { @@ -96,17 +117,19 @@ func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io err = file.Truncate(int64(filesize)) if err != nil { + file.Close() level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { + file.Close() level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } - return fileAsBytes, file, err + return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err } func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { @@ -202,9 +225,13 @@ func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int } } -func (tracker *ActiveQueryTracker) Close() { +// Close closes tracker. +func (tracker *ActiveQueryTracker) Close() error { if tracker == nil || tracker.closer == nil { - return + return nil } - tracker.closer.Close() + if err := tracker.closer.Close(); err != nil { + return fmt.Errorf("close ActiveQueryTracker.closer: %w", err) + } + return nil } diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 4135753fd..7bd93781e 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -16,6 +16,7 @@ package promql import ( "context" "os" + "path/filepath" "testing" "github.com/grafana/regexp" @@ -104,29 +105,26 @@ func TestIndexReuse(t *testing.T) { } func TestMMapFile(t *testing.T) { - file, err := os.CreateTemp("", "mmapedFile") + dir := t.TempDir() + fpath := filepath.Join(dir, "mmapedFile") + const data = "ab" + + fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) require.NoError(t, err) + copy(fileAsBytes, data) + require.NoError(t, closer.Close()) - filename := file.Name() - defer os.Remove(filename) - - fileAsBytes, closer, err := getMMapedFile(filename, 2, nil) - if err != nil { - t.Cleanup(func() { closer.Close() }) - } - - require.NoError(t, err) - copy(fileAsBytes, "ab") - - f, err := os.Open(filename) + f, err := os.Open(fpath) require.NoError(t, err) + t.Cleanup(func() { + _ = f.Close() + }) bytes := make([]byte, 4) n, err := f.Read(bytes) - require.Equal(t, 2, n) require.NoError(t, err, "Unexpected error while reading file.") - - require.Equal(t, fileAsBytes, bytes[:2], "Mmap failed") + require.Equal(t, 2, n) + require.Equal(t, []byte(data), bytes[:2], "Mmap failed") } func TestParseBrokenJSON(t *testing.T) { diff --git a/promql/testdata/native_histograms.test b/promql/testdata/native_histograms.test deleted file mode 100644 index 1da68a385..000000000 --- a/promql/testdata/native_histograms.test +++ /dev/null @@ -1,271 +0,0 @@ -# Minimal valid case: an empty histogram. -load 5m - empty_histogram {{}} - -eval instant at 5m empty_histogram - {__name__="empty_histogram"} {{}} - -eval instant at 5m histogram_count(empty_histogram) - {} 0 - -eval instant at 5m histogram_sum(empty_histogram) - {} 0 - -eval instant at 5m histogram_avg(empty_histogram) - {} NaN - -eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) - {} NaN - -eval instant at 5m histogram_fraction(0, 8, empty_histogram) - {} NaN - - - -# buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). -load 5m - single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} - -# histogram_count extracts the count property from the histogram. -eval instant at 5m histogram_count(single_histogram) - {} 4 - -# histogram_sum extracts the sum property from the histogram. -eval instant at 5m histogram_sum(single_histogram) - {} 5 - -# histogram_avg calculates the average from sum and count properties. -eval instant at 5m histogram_avg(single_histogram) - {} 1.25 - -# We expect half of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, single_histogram) - {} 0.5 - -# We expect all values to fall in the range 0 < x <= 8. -eval instant at 5m histogram_fraction(0, 8, single_histogram) - {} 1 - -# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. -eval instant at 5m histogram_quantile(0.5, single_histogram) - {} 1.5 - - - -# Repeat the same histogram 10 times. -load 5m - multi_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}}x10 - -eval instant at 5m histogram_count(multi_histogram) - {} 4 - -eval instant at 5m histogram_sum(multi_histogram) - {} 5 - -eval instant at 5m histogram_avg(multi_histogram) - {} 1.25 - -eval instant at 5m histogram_fraction(1, 2, multi_histogram) - {} 0.5 - -eval instant at 5m histogram_quantile(0.5, multi_histogram) - {} 1.5 - - -# Each entry should look the same as the first. -eval instant at 50m histogram_count(multi_histogram) - {} 4 - -eval instant at 50m histogram_sum(multi_histogram) - {} 5 - -eval instant at 50m histogram_avg(multi_histogram) - {} 1.25 - -eval instant at 50m histogram_fraction(1, 2, multi_histogram) - {} 0.5 - -eval instant at 50m histogram_quantile(0.5, multi_histogram) - {} 1.5 - - - -# Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket -# with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket -# positions for upper limits <1 (tending toward zero), where offset:-1 is the bucket to the left of offset:0. -load 5m - incr_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{sum:2 count:1 buckets:[1] offset:1}}x10 - -eval instant at 5m histogram_count(incr_histogram) - {} 5 - -eval instant at 5m histogram_sum(incr_histogram) - {} 6 - -eval instant at 5m histogram_avg(incr_histogram) - {} 1.2 - -# We expect 3/5ths of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, incr_histogram) - {} 0.6 - -eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.5 - - -eval instant at 50m incr_histogram - {__name__="incr_histogram"} {{count:14 sum:24 buckets:[1 12 1]}} - -eval instant at 50m histogram_count(incr_histogram) - {} 14 - -eval instant at 50m histogram_sum(incr_histogram) - {} 24 - -eval instant at 50m histogram_avg(incr_histogram) - {} 1.7142857142857142 - -# We expect 12/14ths of the values to fall in the range 1 < x <= 2. -eval instant at 50m histogram_fraction(1, 2, incr_histogram) - {} 0.8571428571428571 - -eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.5 - -# Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. -eval instant at 50m rate(incr_histogram[5m]) - {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} - -# Calculate the 50th percentile of observations over the last 10m. -eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.5 - - - -# Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: -# 0: 1 2 4 8 16 32 64 (higher resolution) -# -1: 1 4 16 64 (lower resolution) -# -# Histograms can be merged as long as the histogram to the right is same resolution or higher. -load 5m - low_res_histogram {{schema:-1 sum:4 count:1 buckets:[1] offset:1}}+{{schema:0 sum:4 count:4 buckets:[2 2] offset:1}}x1 - -eval instant at 5m low_res_histogram - {__name__="low_res_histogram"} {{schema:-1 count:5 sum:8 offset:1 buckets:[5]}} - -eval instant at 5m histogram_count(low_res_histogram) - {} 5 - -eval instant at 5m histogram_sum(low_res_histogram) - {} 8 - -eval instant at 5m histogram_avg(low_res_histogram) - {} 1.6 - -# We expect all values to fall into the lower-resolution bucket with the range 1 < x <= 4. -eval instant at 5m histogram_fraction(1, 4, low_res_histogram) - {} 1 - - - -# z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range -# 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. -load 5m - single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} - -eval instant at 5m histogram_count(single_zero_histogram) - {} 1 - -eval instant at 5m histogram_sum(single_zero_histogram) - {} 0.25 - -eval instant at 5m histogram_avg(single_zero_histogram) - {} 0.25 - -# When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally -# distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the -# entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. -eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) - {} 1 - -# Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. -eval instant at 5m histogram_quantile(0.5, single_zero_histogram) - {} 0 - - - -# Let's turn single_histogram upside-down. -load 5m - negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} - -eval instant at 5m histogram_count(negative_histogram) - {} 4 - -eval instant at 5m histogram_sum(negative_histogram) - {} -5 - -eval instant at 5m histogram_avg(negative_histogram) - {} -1.25 - -# We expect half of the values to fall in the range -2 < x <= -1. -eval instant at 5m histogram_fraction(-2, -1, negative_histogram) - {} 0.5 - -eval instant at 5m histogram_quantile(0.5, negative_histogram) - {} -1.5 - - - -# Two histogram samples. -load 5m - two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} - -# We expect to see the newest sample. -eval instant at 10m histogram_count(two_samples_histogram) - {} 4 - -eval instant at 10m histogram_sum(two_samples_histogram) - {} -4 - -eval instant at 10m histogram_avg(two_samples_histogram) - {} -1 - -eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) - {} 0.5 - -eval instant at 10m histogram_quantile(0.5, two_samples_histogram) - {} -1.5 - - - -# Add two histograms with negated data. -load 5m - balanced_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}}+{{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}}x1 - -eval instant at 5m histogram_count(balanced_histogram) - {} 8 - -eval instant at 5m histogram_sum(balanced_histogram) - {} 0 - -eval instant at 5m histogram_avg(balanced_histogram) - {} 0 - -eval instant at 5m histogram_fraction(0, 4, balanced_histogram) - {} 0.5 - -# If the quantile happens to be located in a span of empty buckets, the actually returned value is the lower bound of -# the first populated bucket after the span of empty buckets. -eval instant at 5m histogram_quantile(0.5, balanced_histogram) - {} 0.5 - -# Add histogram to test sum(last_over_time) regression -load 5m - incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 - incr_sum_histogram{number="2"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:2 count:1 buckets:[1]}}x10 - -eval instant at 50m histogram_sum(sum(incr_sum_histogram)) - {} 30 - -eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) - {} 30 diff --git a/promql/value_test.go b/promql/value_test.go index c93ba8213..0017b41e2 100644 --- a/promql/value_test.go +++ b/promql/value_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package promql +package promql_test import ( "testing" @@ -19,39 +19,40 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql" ) func TestVector_ContainsSameLabelset(t *testing.T) { for name, tc := range map[string]struct { - vector Vector + vector promql.Vector expected bool }{ "empty vector": { - vector: Vector{}, + vector: promql.Vector{}, expected: false, }, "vector with one series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, }, expected: false, }, "vector with two different series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, }, expected: false, }, "vector with two equal series": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "a")}, }, expected: true, }, "vector with three series, two equal": { - vector: Vector{ + vector: promql.Vector{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, {Metric: labels.FromStrings("lbl", "a")}, @@ -67,35 +68,35 @@ func TestVector_ContainsSameLabelset(t *testing.T) { func TestMatrix_ContainsSameLabelset(t *testing.T) { for name, tc := range map[string]struct { - matrix Matrix + matrix promql.Matrix expected bool }{ "empty matrix": { - matrix: Matrix{}, + matrix: promql.Matrix{}, expected: false, }, "matrix with one series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, }, expected: false, }, "matrix with two different series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, }, expected: false, }, "matrix with two equal series": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "a")}, }, expected: true, }, "matrix with three series, two equal": { - matrix: Matrix{ + matrix: promql.Matrix{ {Metric: labels.FromStrings("lbl", "a")}, {Metric: labels.FromStrings("lbl", "b")}, {Metric: labels.FromStrings("lbl", "a")}, diff --git a/rules/alerting.go b/rules/alerting.go index 50c67fa2d..2dc0917dc 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -246,13 +246,16 @@ func (r *AlertingRule) sample(alert *Alert, ts time.Time) promql.Sample { return s } -// forStateSample returns the sample for ALERTS_FOR_STATE. +// forStateSample returns a promql.Sample with the rule labels, `ALERTS_FOR_STATE` as the metric name and the rule name as the `alertname` label. +// Optionally, if an alert is provided it'll copy the labels of the alert into the sample labels. func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) promql.Sample { lb := labels.NewBuilder(r.labels) - alert.Labels.Range(func(l labels.Label) { - lb.Set(l.Name, l.Value) - }) + if alert != nil { + alert.Labels.Range(func(l labels.Label) { + lb.Set(l.Name, l.Value) + }) + } lb.Set(labels.MetricName, alertForStateMetricName) lb.Set(labels.AlertName, r.name) @@ -265,9 +268,11 @@ func (r *AlertingRule) forStateSample(alert *Alert, ts time.Time, v float64) pro return s } -// QueryforStateSeries returns the series for ALERTS_FOR_STATE. -func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q storage.Querier) (storage.Series, error) { - smpl := r.forStateSample(alert, time.Now(), 0) +// QueryForStateSeries returns the series for ALERTS_FOR_STATE of the alert rule. +func (r *AlertingRule) QueryForStateSeries(ctx context.Context, q storage.Querier) (storage.SeriesSet, error) { + // We use a sample to ease the building of matchers. + // Don't provide an alert as we want matchers that match all series for the alert rule. + smpl := r.forStateSample(nil, time.Now(), 0) var matchers []*labels.Matcher smpl.Metric.Range(func(l labels.Label) { mt, err := labels.NewMatcher(labels.MatchEqual, l.Name, l.Value) @@ -276,20 +281,9 @@ func (r *AlertingRule) QueryforStateSeries(ctx context.Context, alert *Alert, q } matchers = append(matchers, mt) }) + sset := q.Select(ctx, false, nil, matchers...) - - var s storage.Series - for sset.Next() { - // Query assures that smpl.Metric is included in sset.At().Labels(), - // hence just checking the length would act like equality. - // (This is faster than calling labels.Compare again as we already have some info). - if sset.At().Labels().Len() == len(matchers) { - s = sset.At() - break - } - } - - return s, sset.Err() + return sset, sset.Err() } // SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation. @@ -344,10 +338,9 @@ const resolvedRetention = 15 * time.Minute // Eval evaluates the rule expression and then creates pending alerts and fires // or removes previously pending alerts accordingly. -func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { +func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(r)) - - res, err := query(ctx, r.vector.String(), ts) + res, err := query(ctx, r.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } @@ -457,8 +450,17 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } } - // If the alert was previously firing, keep it around for a given - // retention time so it is reported as resolved to the AlertManager. + // If the alert is resolved (was firing but is now inactive) keep it for + // at least the retention period. This is important for a number of reasons: + // + // 1. It allows for Prometheus to be more resilient to network issues that + // would otherwise prevent a resolved alert from being reported as resolved + // to Alertmanager. + // + // 2. It helps reduce the chance of resolved notifications being lost if + // Alertmanager crashes or restarts between receiving the resolved alert + // from Prometheus and sending the resolved notification. This tends to + // occur for routes with large Group intervals. if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { delete(r.active, fp) } @@ -481,8 +483,8 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } if r.restored.Load() { - vec = append(vec, r.sample(a, ts)) - vec = append(vec, r.forStateSample(a, ts, float64(a.ActiveAt.Unix()))) + vec = append(vec, r.sample(a, ts.Add(-queryOffset))) + vec = append(vec, r.forStateSample(a, ts.Add(-queryOffset), float64(a.ActiveAt.Unix()))) } } @@ -548,6 +550,13 @@ func (r *AlertingRule) ForEachActiveAlert(f func(*Alert)) { } } +func (r *AlertingRule) ActiveAlertsCount() int { + r.activeMtx.Lock() + defer r.activeMtx.Unlock() + + return len(r.active) +} + func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay, interval time.Duration, notifyFunc NotifyFunc) { alerts := []*Alert{} r.ForEachActiveAlert(func(alert *Alert) { diff --git a/rules/alerting_test.go b/rules/alerting_test.go index ddfe345ef..5ebd049f6 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" @@ -122,7 +123,7 @@ func TestAlertingRuleTemplateWithHistogram(t *testing.T) { ) evalTime := time.Now() - res, err := rule.Eval(context.TODO(), evalTime, q, nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, q, nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -148,7 +149,7 @@ func TestAlertingRuleTemplateWithHistogram(t *testing.T) { } func TestAlertingRuleLabelsUpdate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 stale `) @@ -229,7 +230,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -246,13 +247,13 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -314,7 +315,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -328,7 +329,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } res, err = ruleWithExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -345,7 +346,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } func TestAlertingRuleExternalURLInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -407,7 +408,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -421,7 +422,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } res, err = ruleWithExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -438,7 +439,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 `) @@ -476,7 +477,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := rule.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -492,7 +493,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { } func TestAlertingRuleQueryInTemplate(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 70 85 70 70 `) @@ -543,7 +544,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; close(getDoneCh) }() _, err = ruleWithQueryInTemplate.Eval( - context.TODO(), evalTime, slowQueryFunc, nil, 0, + context.TODO(), 0, evalTime, slowQueryFunc, nil, 0, ) require.NoError(t, err) } @@ -595,13 +596,13 @@ func TestAlertingRuleDuplicate(t *testing.T) { "", true, log.NewNopLogger(), ) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying alert labels") } func TestAlertingRuleLimit(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{label="1"} 1 metric{label="2"} 1 @@ -643,7 +644,7 @@ func TestAlertingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -710,19 +711,17 @@ func TestQueryForStateSeries(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - alert := &Alert{ - State: 0, - Labels: labels.EmptyLabels(), - Annotations: labels.EmptyLabels(), - Value: 0, - ActiveAt: time.Time{}, - FiredAt: time.Time{}, - ResolvedAt: time.Time{}, - LastSentAt: time.Time{}, - ValidUntil: time.Time{}, - } + sample := rule.forStateSample(nil, time.Time{}, 0) - series, err := rule.QueryforStateSeries(context.Background(), alert, querier) + seriesSet, err := rule.QueryForStateSeries(context.Background(), querier) + + var series storage.Series + for seriesSet.Next() { + if seriesSet.At().Labels().Len() == sample.Metric.Len() { + series = seriesSet.At() + break + } + } require.Equal(t, tst.expectedSeries, series) require.Equal(t, tst.expectedError, err) @@ -785,7 +784,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { } func TestKeepFiringFor(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 85 70 70 10x5 `) @@ -872,7 +871,7 @@ func TestKeepFiringFor(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -889,13 +888,13 @@ func TestKeepFiringFor(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } func TestPendingAndKeepFiringFor(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m http_requests{job="app-server", instance="0"} 75 10x10 `) @@ -926,7 +925,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { baseTime := time.Unix(0, 0) result.T = timestamp.FromTime(baseTime) - res, err := rule.Eval(context.TODO(), baseTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, baseTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -941,7 +940,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { } evalTime := baseTime.Add(time.Minute) - res, err = rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err = rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -975,7 +974,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { true, log.NewNopLogger(), ) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) @@ -1025,3 +1024,24 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { rule.SetNoDependencyRules(true) require.True(t, rule.NoDependencyRules()) } + +func TestAlertingRule_ActiveAlertsCount(t *testing.T) { + rule := NewAlertingRule( + "TestRule", + nil, + time.Minute, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + + require.Equal(t, 0, rule.ActiveAlertsCount()) + + // Set an active alert. + lbls := labels.FromStrings("a1", "1") + h := lbls.Hash() + al := &Alert{State: StateFiring, Labels: lbls, ActiveAt: time.Now()} + rule.active[h] = al + + require.Equal(t, 1, rule.ActiveAlertsCount()) +} diff --git a/rules/group.go b/rules/group.go index c268d2df7..9ae89789d 100644 --- a/rules/group.go +++ b/rules/group.go @@ -47,6 +47,7 @@ type Group struct { name string file string interval time.Duration + queryOffset *time.Duration limit int rules []Rule seriesInPreviousEval []map[string]labels.Labels // One per Rule. @@ -90,6 +91,7 @@ type GroupOptions struct { Rules []Rule ShouldRestore bool Opts *ManagerOptions + QueryOffset *time.Duration done chan struct{} EvalIterationFunc GroupEvalIterationFunc } @@ -126,6 +128,7 @@ func NewGroup(o GroupOptions) *Group { name: o.Name, file: o.File, interval: o.Interval, + queryOffset: o.QueryOffset, limit: o.Limit, rules: o.Rules, shouldRestore: o.ShouldRestore, @@ -230,7 +233,11 @@ func (g *Group) run(ctx context.Context) { g.evalIterationFunc(ctx, g, evalTimestamp) } - g.RestoreForState(time.Now()) + restoreStartTime := time.Now() + g.RestoreForState(restoreStartTime) + totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() + g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) + level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } @@ -439,6 +446,8 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { wg sync.WaitGroup ) + ruleQueryOffset := g.QueryOffset() + for i, rule := range g.rules { select { case <-g.done: @@ -469,7 +478,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) + vector, err := rule.Eval(ctx, ruleQueryOffset, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) if err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) @@ -558,7 +567,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { for metric, lset := range g.seriesInPreviousEval[i] { if _, ok := seriesReturned[metric]; !ok { // Series no longer exposed, mark it stale. - _, err = app.Append(0, lset, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-ruleQueryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err @@ -597,14 +606,27 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.cleanupStaleSeries(ctx, ts) } +func (g *Group) QueryOffset() time.Duration { + if g.queryOffset != nil { + return *g.queryOffset + } + + if g.opts.DefaultRuleQueryOffset != nil { + return g.opts.DefaultRuleQueryOffset() + } + + return time.Duration(0) +} + func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { if len(g.staleSeries) == 0 { return } app := g.opts.Appendable.Appender(ctx) + queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. - _, err := app.Append(0, s, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err := app.Append(0, s, timestamp.FromTime(ts.Add(-queryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err @@ -660,25 +682,40 @@ func (g *Group) RestoreForState(ts time.Time) { continue } + sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) + if err != nil { + level.Error(g.logger).Log( + "msg", "Failed to restore 'for' state", + labels.AlertName, alertRule.Name(), + "stage", "Select", + "err", err, + ) + // Even if we failed to query the `ALERT_FOR_STATE` series, we currently have no way to retry the restore process. + // So the best we can do is mark the rule as restored and let it eventually fire. + alertRule.SetRestored(true) + continue + } + + // While not technically the same number of series we expect, it's as good of an approximation as any. + seriesByLabels := make(map[string]storage.Series, alertRule.ActiveAlertsCount()) + for sset.Next() { + seriesByLabels[sset.At().Labels().DropMetricName().String()] = sset.At() + } + + // No results for this alert rule. + if len(seriesByLabels) == 0 { + level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + alertRule.SetRestored(true) + continue + } + alertRule.ForEachActiveAlert(func(a *Alert) { var s storage.Series - s, err := alertRule.QueryforStateSeries(g.opts.Context, a, q) - if err != nil { - // Querier Warnings are ignored. We do not care unless we have an error. - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", - labels.AlertName, alertRule.Name(), - "stage", "Select", - "err", err, - ) + s, ok := seriesByLabels[a.Labels.String()] + if !ok { return } - - if s == nil { - return - } - // Series found for the 'for' state. var t int64 var v float64 @@ -779,17 +816,18 @@ const namespace = "prometheus" // Metrics for rule evaluation. type Metrics struct { - EvalDuration prometheus.Summary - IterationDuration prometheus.Summary - IterationsMissed *prometheus.CounterVec - IterationsScheduled *prometheus.CounterVec - EvalTotal *prometheus.CounterVec - EvalFailures *prometheus.CounterVec - GroupInterval *prometheus.GaugeVec - GroupLastEvalTime *prometheus.GaugeVec - GroupLastDuration *prometheus.GaugeVec - GroupRules *prometheus.GaugeVec - GroupSamples *prometheus.GaugeVec + EvalDuration prometheus.Summary + IterationDuration prometheus.Summary + IterationsMissed *prometheus.CounterVec + IterationsScheduled *prometheus.CounterVec + EvalTotal *prometheus.CounterVec + EvalFailures *prometheus.CounterVec + GroupInterval *prometheus.GaugeVec + GroupLastEvalTime *prometheus.GaugeVec + GroupLastDuration *prometheus.GaugeVec + GroupLastRestoreDuration *prometheus.GaugeVec + GroupRules *prometheus.GaugeVec + GroupSamples *prometheus.GaugeVec } // NewGroupMetrics creates a new instance of Metrics and registers it with the provided registerer, @@ -865,6 +903,14 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { }, []string{"rule_group"}, ), + GroupLastRestoreDuration: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: "rule_group_last_restore_duration_seconds", + Help: "The duration of the last alert rules alerts restoration using the `ALERTS_FOR_STATE` series.", + }, + []string{"rule_group"}, + ), GroupRules: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, @@ -894,6 +940,7 @@ func NewGroupMetrics(reg prometheus.Registerer) *Metrics { m.GroupInterval, m.GroupLastEvalTime, m.GroupLastDuration, + m.GroupLastRestoreDuration, m.GroupRules, m.GroupSamples, ) diff --git a/rules/manager.go b/rules/manager.go index 165dca144..063189e0a 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -116,6 +116,7 @@ type ManagerOptions struct { ForGracePeriod time.Duration ResendDelay time.Duration GroupLoader GroupLoader + DefaultRuleQueryOffset func() time.Duration MaxConcurrentEvals int64 ConcurrentEvalsEnabled bool RuleConcurrencyController RuleConcurrencyController @@ -336,6 +337,7 @@ func (m *Manager) LoadGroups( Rules: rules, ShouldRestore: shouldRestore, Opts: m.opts, + QueryOffset: (*time.Duration)(rg.QueryOffset), done: m.done, EvalIterationFunc: groupEvalIterationFunc, }) diff --git a/rules/manager_test.go b/rules/manager_test.go index 50ab6b861..11d1282bd 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -16,9 +16,12 @@ package rules import ( "context" "fmt" + "io/fs" "math" "os" + "path" "sort" + "strconv" "sync" "testing" "time" @@ -38,6 +41,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/tsdbutil" @@ -50,7 +54,7 @@ func TestMain(m *testing.M) { } func TestAlertingRule(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 @@ -160,7 +164,7 @@ func TestAlertingRule(t *testing.T) { evalTime := baseTime.Add(test.time) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -190,152 +194,156 @@ func TestAlertingRule(t *testing.T) { } func TestForStateAddSamples(t *testing.T) { - storage := promql.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - time.Minute, - 0, - labels.FromStrings("severity", "{{\"c\"}}ritical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) - result := promql.Vector{ - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - } - - baseTime := time.Unix(0, 0) - - tests := []struct { - time time.Duration - result promql.Vector - persistThisTime bool // If true, it means this 'time' is persisted for 'for'. - }{ - { - time: 0, - result: append(promql.Vector{}, result[:2]...), - persistThisTime: true, - }, - { - time: 5 * time.Minute, - result: append(promql.Vector{}, result[2:]...), - }, - { - time: 10 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - { - time: 15 * time.Minute, - result: nil, - }, - { - time: 20 * time.Minute, - result: nil, - }, - { - time: 25 * time.Minute, - result: append(promql.Vector{}, result[:1]...), - persistThisTime: true, - }, - { - time: 30 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - } - - var forState float64 - for i, test := range tests { - t.Logf("case %d", i) - evalTime := baseTime.Add(test.time) - - if test.persistThisTime { - forState = float64(evalTime.Unix()) - } - if test.result == nil { - forState = float64(value.StaleNaN) - } - - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) - require.NoError(t, err) - - var filteredRes promql.Vector // After removing 'ALERTS' samples. - for _, smpl := range res { - smplName := smpl.Metric.Get("__name__") - if smplName == "ALERTS_FOR_STATE" { - filteredRes = append(filteredRes, smpl) - } else { - // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. - require.Equal(t, "ALERTS", smplName) + rule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + time.Minute, + 0, + labels.FromStrings("severity", "{{\"c\"}}ritical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + result := promql.Vector{ + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, } - } - for i := range test.result { - test.result[i].T = timestamp.FromTime(evalTime) - // Updating the expected 'for' state. - if test.result[i].F >= 0 { - test.result[i].F = forState - } - } - require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) - sort.Slice(filteredRes, func(i, j int) bool { - return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + baseTime := time.Unix(0, 0) + + tests := []struct { + time time.Duration + result promql.Vector + persistThisTime bool // If true, it means this 'time' is persisted for 'for'. + }{ + { + time: 0, + result: append(promql.Vector{}, result[:2]...), + persistThisTime: true, + }, + { + time: 5 * time.Minute, + result: append(promql.Vector{}, result[2:]...), + }, + { + time: 10 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + { + time: 15 * time.Minute, + result: nil, + }, + { + time: 20 * time.Minute, + result: nil, + }, + { + time: 25 * time.Minute, + result: append(promql.Vector{}, result[:1]...), + persistThisTime: true, + }, + { + time: 30 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + } + + var forState float64 + for i, test := range tests { + t.Logf("case %d", i) + evalTime := baseTime.Add(test.time).Add(queryOffset) + + if test.persistThisTime { + forState = float64(evalTime.Unix()) + } + if test.result == nil { + forState = float64(value.StaleNaN) + } + + res, err := rule.Eval(context.TODO(), queryOffset, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + require.NoError(t, err) + + var filteredRes promql.Vector // After removing 'ALERTS' samples. + for _, smpl := range res { + smplName := smpl.Metric.Get("__name__") + if smplName == "ALERTS_FOR_STATE" { + filteredRes = append(filteredRes, smpl) + } else { + // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. + require.Equal(t, "ALERTS", smplName) + } + } + for i := range test.result { + test.result[i].T = timestamp.FromTime(evalTime.Add(-queryOffset)) + // Updating the expected 'for' state. + if test.result[i].F >= 0 { + test.result[i].F = forState + } + } + require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) + + sort.Slice(filteredRes, func(i, j int) bool { + return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + }) + prom_testutil.RequireEqual(t, test.result, filteredRes) + + for _, aa := range rule.ActiveAlerts() { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + } }) - prom_testutil.RequireEqual(t, test.result, filteredRes) - - for _, aa := range rule.ActiveAlerts() { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } } } @@ -347,242 +355,251 @@ func sortAlerts(items []*Alert) { } func TestForStateRestore(t *testing.T) { - storage := promql.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), - Appendable: storage, - Queryable: storage, - Context: context.Background(), - Logger: log.NewNopLogger(), - NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, - OutageTolerance: 30 * time.Minute, - ForGracePeriod: 10 * time.Minute, - } + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(testEngine, storage), + Appendable: storage, + Queryable: storage, + Context: context.Background(), + Logger: log.NewNopLogger(), + NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, + OutageTolerance: 30 * time.Minute, + ForGracePeriod: 10 * time.Minute, + } - alertForDuration := 25 * time.Minute - // Initial run before prometheus goes down. - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) + alertForDuration := 25 * time.Minute + // Initial run before prometheus goes down. + rule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - groups := make(map[string]*Group) - groups["default;"] = group + group := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{rule}, + ShouldRestore: true, + Opts: opts, + }) + groups := make(map[string]*Group) + groups["default;"] = group - initialRuns := []time.Duration{0, 5 * time.Minute} + initialRuns := []time.Duration{0, 5 * time.Minute} - baseTime := time.Unix(0, 0) - for _, duration := range initialRuns { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } + baseTime := time.Unix(0, 0) + for _, duration := range initialRuns { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } - exp := rule.ActiveAlerts() - for _, aa := range exp { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(exp, func(i, j int) bool { - return labels.Compare(exp[i].Labels, exp[j].Labels) < 0 - }) + // Prometheus goes down here. We create new rules and groups. + type testInput struct { + name string + restoreDuration time.Duration + expectedAlerts []*Alert - // Prometheus goes down here. We create new rules and groups. - type testInput struct { - restoreDuration time.Duration - alerts []*Alert + num int + noRestore bool + gracePeriod bool + downDuration time.Duration + before func() + } - num int - noRestore bool - gracePeriod bool - downDuration time.Duration - } + tests := []testInput{ + { + name: "normal restore (alerts were not firing)", + restoreDuration: 15 * time.Minute, + expectedAlerts: rule.ActiveAlerts(), + downDuration: 10 * time.Minute, + }, + { + name: "outage tolerance", + restoreDuration: 40 * time.Minute, + noRestore: true, + num: 2, + }, + { + name: "no active alerts", + restoreDuration: 50 * time.Minute, + expectedAlerts: []*Alert{}, + }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + expectedAlerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, + } - tests := []testInput{ - { - // Normal restore (alerts were not firing). - restoreDuration: 15 * time.Minute, - alerts: rule.ActiveAlerts(), - downDuration: 10 * time.Minute, - }, - { - // Testing Outage Tolerance. - restoreDuration: 40 * time.Minute, - noRestore: true, - num: 2, - }, - { - // No active alerts. - restoreDuration: 50 * time.Minute, - alerts: []*Alert{}, - }, - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } - testFunc := func(tst testInput) { - newRule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, - ) - newGroup := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{newRule}, - ShouldRestore: true, - Opts: opts, + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration).Add(queryOffset) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // In all cases, we expect the restoration process to have completed. + require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.expectedAlerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := queryOffset.Seconds() + float64(e.ActiveAt.Unix()+int64(tt.downDuration/time.Second)-got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } + }) + } }) - - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup - - restoreTime := baseTime.Add(tst.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - - // Checking if we have restored it correctly. - switch { - case tst.noRestore: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tst.gracePeriod: - require.Len(t, got, tst.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tst.alerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) - - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tst.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } - } } - - for _, tst := range tests { - testFunc(tst) - } - - // Testing the grace period. - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - testFunc(testInput{ - restoreDuration: 25 * time.Minute, - alerts: []*Alert{}, - gracePeriod: true, - num: 2, - }) } func TestStaleness(t *testing.T) { - st := teststorage.New(t) - defer st.Close() - engineOpts := promql.EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10, - Timeout: 10 * time.Second, + for _, queryOffset := range []time.Duration{0, time.Minute} { + st := teststorage.New(t) + defer st.Close() + engineOpts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(engineOpts) + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(engine, st), + Appendable: st, + Queryable: st, + Context: context.Background(), + Logger: log.NewNopLogger(), + } + + expr, err := parser.ParseExpr("a + 1") + require.NoError(t, err) + rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) + group := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{rule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + // A time series that has two samples and then goes stale. + app := st.Appender(context.Background()) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) + + err = app.Commit() + require.NoError(t, err) + + ctx := context.Background() + + // Execute 3 times, 1 second apart. + group.Eval(ctx, time.Unix(0, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(1, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(2, 0).Add(queryOffset)) + + querier, err := st.Querier(0, 2000) + require.NoError(t, err) + defer querier.Close() + + matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") + require.NoError(t, err) + + set := querier.Select(ctx, false, nil, matcher) + samples, err := readSeriesSet(set) + require.NoError(t, err) + + metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() + metricSample, ok := samples[metric] + + require.True(t, ok, "Series %s not returned.", metric) + require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) + metricSample[2].F = 42 // require.Equal cannot handle NaN. + + want := map[string][]promql.FPoint{ + metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, + } + + require.Equal(t, want, samples) } - engine := promql.NewEngine(engineOpts) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(engine, st), - Appendable: st, - Queryable: st, - Context: context.Background(), - Logger: log.NewNopLogger(), - } - - expr, err := parser.ParseExpr("a + 1") - require.NoError(t, err) - rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - - // A time series that has two samples and then goes stale. - app := st.Appender(context.Background()) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) - - err = app.Commit() - require.NoError(t, err) - - ctx := context.Background() - - // Execute 3 times, 1 second apart. - group.Eval(ctx, time.Unix(0, 0)) - group.Eval(ctx, time.Unix(1, 0)) - group.Eval(ctx, time.Unix(2, 0)) - - querier, err := st.Querier(0, 2000) - require.NoError(t, err) - defer querier.Close() - - matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") - require.NoError(t, err) - - set := querier.Select(ctx, false, nil, matcher) - samples, err := readSeriesSet(set) - require.NoError(t, err) - - metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() - metricSample, ok := samples[metric] - - require.True(t, ok, "Series %s not returned.", metric) - require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) - metricSample[2].F = 42 // require.Equal cannot handle NaN. - - want := map[string][]promql.FPoint{ - metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, - } - - require.Equal(t, want, samples) } // Convert a SeriesSet into a form usable with require.Equal. @@ -606,6 +623,46 @@ func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.FPoint, error) { return result, ss.Err() } +func TestGroup_QueryOffset(t *testing.T) { + config := ` +groups: + - name: group1 + query_offset: 2m + - name: group2 + query_offset: 0s + - name: group3 +` + + dir := t.TempDir() + fname := path.Join(dir, "rules.yaml") + err := os.WriteFile(fname, []byte(config), fs.ModePerm) + require.NoError(t, err) + + m := NewManager(&ManagerOptions{ + Logger: log.NewNopLogger(), + DefaultRuleQueryOffset: func() time.Duration { + return time.Minute + }, + }) + m.start() + err = m.Update(time.Second, []string{fname}, labels.EmptyLabels(), "", nil) + require.NoError(t, err) + + rgs := m.RuleGroups() + sort.Slice(rgs, func(i, j int) bool { + return rgs[i].Name() < rgs[j].Name() + }) + + // From config. + require.Equal(t, 2*time.Minute, rgs[0].QueryOffset()) + // Setting 0 in config is detected. + require.Equal(t, time.Duration(0), rgs[1].QueryOffset()) + // Default when nothing is set. + require.Equal(t, time.Minute, rgs[2].QueryOffset()) + + m.Stop() +} + func TestCopyState(t *testing.T) { oldGroup := &Group{ rules: []Rule{ @@ -1231,7 +1288,7 @@ func TestRuleHealthUpdates(t *testing.T) { } func TestRuleGroupEvalIterationFunc(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{instance="0"} 75 85 50 0 0 25 0 0 40 0 120 `) @@ -1359,7 +1416,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { ts := time.Now() app := db.Appender(context.Background()) for i, h := range hists { - l := labels.FromStrings("__name__", "histogram_metric", "idx", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "histogram_metric", "idx", strconv.Itoa(i)) _, err := app.AppendHistogram(0, l, ts.UnixMilli(), h.Copy(), nil) require.NoError(t, err) } @@ -2041,7 +2098,7 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) { require.EqualValues(t, maxInflight.Load(), int32(maxConcurrency)+int32(groupCount)) } -const artificialDelay = 10 * time.Millisecond +const artificialDelay = 15 * time.Millisecond func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.Int32, maxConcurrent int64) *ManagerOptions { var inflightMu sync.Mutex diff --git a/rules/origin_test.go b/rules/origin_test.go index ca466301d..75c83f9a4 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -31,7 +31,7 @@ type unknownRule struct{} func (u unknownRule) Name() string { return "" } func (u unknownRule) Labels() labels.Labels { return labels.EmptyLabels() } -func (u unknownRule) Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { +func (u unknownRule) Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { return nil, nil } func (u unknownRule) String() string { return "" } diff --git a/rules/recording.go b/rules/recording.go index e2b0a31a0..17a75fdd1 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -77,10 +77,9 @@ func (rule *RecordingRule) Labels() labels.Labels { } // Eval evaluates the rule and then overrides the metric names and labels accordingly. -func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { +func (rule *RecordingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(rule)) - - vector, err := query(ctx, rule.vector.String(), ts) + vector, err := query(ctx, rule.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } diff --git a/rules/recording_test.go b/rules/recording_test.go index 24b7d6539..fdddd4e02 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -111,7 +112,7 @@ var ruleEvalTestScenarios = []struct { } func setUpRuleEvalTest(t require.TestingT) *teststorage.TestStorage { - return promql.LoadedStorage(t, ` + return promqltest.LoadedStorage(t, ` load 1m metric{label_a="1",label_b="3"} 1 metric{label_a="2",label_b="4"} 10 @@ -125,7 +126,7 @@ func TestRuleEval(t *testing.T) { for _, scenario := range ruleEvalTestScenarios { t.Run(scenario.name, func(t *testing.T) { rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels) - result, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + result, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) testutil.RequireEqual(t, scenario.expected, result) }) @@ -143,7 +144,7 @@ func BenchmarkRuleEval(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + _, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) if err != nil { require.NoError(b, err) } @@ -172,13 +173,13 @@ func TestRuleEvalDuplicate(t *testing.T) { expr, _ := parser.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`) rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test")) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying rule labels") } func TestRecordingRuleLimit(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m metric{label="1"} 1 metric{label="2"} 1 @@ -214,7 +215,7 @@ func TestRecordingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -242,7 +243,7 @@ func TestRecordingEvalWithOrigin(t *testing.T) { require.NoError(t, err) rule := NewRecordingRule(name, expr, lbs) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) diff --git a/rules/rule.go b/rules/rule.go index 59af3e0bb..687c03d00 100644 --- a/rules/rule.go +++ b/rules/rule.go @@ -40,7 +40,7 @@ type Rule interface { // Labels of the rule. Labels() labels.Labels // Eval evaluates the rule, including any associated recording or alerting actions. - Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) + Eval(ctx context.Context, queryOffset time.Duration, evaluationTime time.Time, queryFunc QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) // String returns a human-readable string representation of the rule. String() string // Query returns the rule query expression. diff --git a/scrape/manager.go b/scrape/manager.go index a7a8b828e..cb92db5a8 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -81,6 +81,8 @@ type Options struct { // Option to enable the ingestion of the created timestamp as a synthetic zero sample. // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md EnableCreatedTimestampZeroIngestion bool + // Option to enable the ingestion of native histograms. + EnableNativeHistogramsIngestion bool // Optional HTTP client options to use when scraping. HTTPClientOptions []config_util.HTTPClientOption diff --git a/scrape/scrape.go b/scrape/scrape.go index 4bbeab57a..c285f05e3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -178,6 +178,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.interval, opts.timeout, opts.scrapeClassicHistograms, + options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, options.EnableMetadataStorage, @@ -827,7 +828,10 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool - enableCTZeroIngestion bool + + // Feature flagged options. + enableNativeHistogramIngestion bool + enableCTZeroIngestion bool appender func(ctx context.Context) storage.Appender symbolTable *labels.SymbolTable @@ -1123,6 +1127,7 @@ func newScrapeLoop(ctx context.Context, interval time.Duration, timeout time.Duration, scrapeClassicHistograms bool, + enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, appendMetadataToWAL bool, @@ -1153,33 +1158,34 @@ func newScrapeLoop(ctx context.Context, } sl := &scrapeLoop{ - scraper: sc, - buffers: buffers, - cache: cache, - appender: appender, - symbolTable: symbolTable, - sampleMutator: sampleMutator, - reportSampleMutator: reportSampleMutator, - stopped: make(chan struct{}), - offsetSeed: offsetSeed, - l: l, - parentCtx: ctx, - appenderCtx: appenderCtx, - honorTimestamps: honorTimestamps, - trackTimestampsStaleness: trackTimestampsStaleness, - enableCompression: enableCompression, - sampleLimit: sampleLimit, - bucketLimit: bucketLimit, - maxSchema: maxSchema, - labelLimits: labelLimits, - interval: interval, - timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, - enableCTZeroIngestion: enableCTZeroIngestion, - reportExtraMetrics: reportExtraMetrics, - appendMetadataToWAL: appendMetadataToWAL, - metrics: metrics, - skipOffsetting: skipOffsetting, + scraper: sc, + buffers: buffers, + cache: cache, + appender: appender, + symbolTable: symbolTable, + sampleMutator: sampleMutator, + reportSampleMutator: reportSampleMutator, + stopped: make(chan struct{}), + offsetSeed: offsetSeed, + l: l, + parentCtx: ctx, + appenderCtx: appenderCtx, + honorTimestamps: honorTimestamps, + trackTimestampsStaleness: trackTimestampsStaleness, + enableCompression: enableCompression, + sampleLimit: sampleLimit, + bucketLimit: bucketLimit, + maxSchema: maxSchema, + labelLimits: labelLimits, + interval: interval, + timeout: timeout, + scrapeClassicHistograms: scrapeClassicHistograms, + enableNativeHistogramIngestion: enableNativeHistogramIngestion, + enableCTZeroIngestion: enableCTZeroIngestion, + reportExtraMetrics: reportExtraMetrics, + appendMetadataToWAL: appendMetadataToWAL, + metrics: metrics, + skipOffsetting: skipOffsetting, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1627,7 +1633,7 @@ loop: } } - if isHistogram { + if isHistogram && sl.enableNativeHistogramIngestion { if h != nil { ref, err = app.AppendHistogram(ref, lset, t, h, nil) } else { diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 20b21936b..b5a31cb65 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -678,6 +678,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), @@ -819,6 +820,7 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -962,6 +964,7 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, @@ -1282,7 +1285,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { for i := 0; i < 500; i++ { s = fmt.Sprintf("%smetric_%d_%d 42\n", s, i, numScrapes) } - w.Write([]byte(fmt.Sprintf(s + "&"))) + w.Write([]byte(s + "&")) } else { cancel() } @@ -1571,6 +1574,7 @@ func TestScrapeLoop_HistogramBucketLimit(t *testing.T) { app := &bucketLimitAppender{Appender: resApp, limit: 2} sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = true sl.sampleMutator = func(l labels.Labels) labels.Labels { if l.Has("deleteme") { return labels.EmptyLabels() @@ -1797,14 +1801,15 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { - title string - scrapeClassicHistograms bool - scrapeText string - contentType string - discoveryLabels []string - floats []floatSample - histograms []histogramSample - exemplars []exemplar.Exemplar + title string + scrapeClassicHistograms bool + enableNativeHistogramsIngestion bool + scrapeText string + contentType string + discoveryLabels []string + floats []floatSample + histograms []histogramSample + exemplars []exemplar.Exemplar }{ { title: "Metric without exemplars", @@ -1862,6 +1867,8 @@ metric_total{n="2"} 2 # {t="2"} 2.0 20000 }, { title: "Native histogram with three exemplars", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -1976,6 +1983,8 @@ metric: < }, { title: "Native histogram with three exemplars scraped as classic histogram", + + enableNativeHistogramsIngestion: true, scrapeText: `name: "test_histogram" help: "Test histogram with many buckets removed to keep it manageable in size." type: HISTOGRAM @@ -2115,6 +2124,7 @@ metric: < } sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) + sl.enableNativeHistogramIngestion = test.enableNativeHistogramsIngestion sl.sampleMutator = func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, discoveryLabels, false, nil) } @@ -3710,7 +3720,7 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(nil, nil, s, reg) + mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) require.NoError(t, err) cfg, err := config.Load(configStr, false, log.NewNopLogger()) require.NoError(t, err) diff --git a/scrape/target_test.go b/scrape/target_test.go index f91e31050..b43ff2406 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -21,6 +21,7 @@ import ( "net/http/httptest" "net/url" "os" + "strconv" "strings" "testing" "time" @@ -67,7 +68,7 @@ func TestTargetOffset(t *testing.T) { // Calculate offsets for 10000 different targets. for i := range offsets { target := newTestTarget("example.com:80", 0, labels.FromStrings( - "label", fmt.Sprintf("%d", i), + "label", strconv.Itoa(i), )) offsets[i] = target.offset(interval, offsetSeed) } diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index a7a40c1be..5ceb59099 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,8 +24,8 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - - name: install Go + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -33,6 +33,7 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: - version: v1.56.2 + args: --verbose + version: v1.59.0 diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index e2058fb54..58520c6a5 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -75,7 +75,7 @@ type AzureADConfig struct { //nolint:revive // exported. // OAuth is the oauth config that is being used to authenticate. OAuth *OAuthConfig `yaml:"oauth,omitempty"` - // OAuth is the oauth config that is being used to authenticate. + // SDK is the SDK config that is being used to authenticate. SDK *SDKConfig `yaml:"sdk,omitempty"` // Cloud is the Azure cloud in which the service is running. Example: AzurePublic/AzureGovernment/AzureChina. diff --git a/storage/remote/otlptranslator/README.md b/storage/remote/otlptranslator/README.md deleted file mode 100644 index 774fac5a7..000000000 --- a/storage/remote/otlptranslator/README.md +++ /dev/null @@ -1,22 +0,0 @@ -## Copying from opentelemetry/opentelemetry-collector-contrib - -This files in the `prometheus/` and `prometheusremotewrite/` are copied from the OpenTelemetry Project[^1]. - -This is done instead of adding a go.mod dependency because OpenTelemetry depends on `prometheus/prometheus` and a cyclic dependency will be created. This is just a temporary solution and the long-term solution is to move the required packages from OpenTelemetry into `prometheus/prometheus`. - -To update the dependency is a multi-step process: -1. Vendor the latest `prometheus/prometheus`@`main` into [`opentelemetry/opentelemetry-collector-contrib`](https://github.com/open-telemetry/opentelemetry-collector-contrib) -1. Update the VERSION in `update-copy.sh`. -1. Run `./update-copy.sh`. - -### Why copy? - -This is because the packages we copy depend on the [`prompb`](https://github.com/prometheus/prometheus/blob/main/prompb) package. While the package is relatively stable, there are still changes. For example, https://github.com/prometheus/prometheus/pull/11935 changed the types. -This means if we depend on the upstream packages directly, we will never able to make the changes like above. Hence we're copying the code for now. - -### I need to manually change these files - -When we do want to make changes to the types in `prompb`, we might need to edit the files directly. That is OK, please let @gouthamve or @jesusvazquez know so they can take care of updating the upstream code (by vendoring in `prometheus/prometheus` upstream and resolving conflicts) and then will run the copy -script again to keep things updated. - -[^1]: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus and https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheusremotewrite diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a6b41d1c3..6360aa976 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -1,22 +1,24 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_label.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" "unicode" - - "go.opentelemetry.io/collector/featuregate" -) - -var dropSanitizationGate = featuregate.GlobalRegistry().MustRegister( - "pkg.translator.prometheus.PermissiveLabelSanitization", - featuregate.StageAlpha, - featuregate.WithRegisterDescription("Controls whether to change labels starting with '_' to 'key_'."), - featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"), ) // Normalizes the specified label to follow Prometheus label names standard @@ -39,7 +41,7 @@ func NormalizeLabel(label string) string { // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { label = "key_" + label - } else if strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") && !dropSanitizationGate.IsEnabled() { + } else if strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") { label = "key" + label } diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index a976dfb48..4cf36671a 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -1,15 +1,25 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/normalize_name.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import ( "strings" "unicode" - "go.opentelemetry.io/collector/featuregate" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -67,13 +77,6 @@ var perUnitMap = map[string]string{ "y": "year", } -var normalizeNameGate = featuregate.GlobalRegistry().MustRegister( - "pkg.translator.prometheus.NormalizeName", - featuregate.StageBeta, - featuregate.WithRegisterDescription("Controls whether metrics names are automatically normalized to follow Prometheus naming convention"), - featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/8950"), -) - // BuildCompliantName builds a Prometheus-compliant metric name for the specified metric // // Metric name is prefixed with specified namespace and underscore (if any). @@ -86,7 +89,7 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix var metricName string // Full normalization following standard Prometheus naming conventions - if addMetricSuffixes && normalizeNameGate.IsEnabled() { + if addMetricSuffixes { return normalizeName(metric, namespace) } diff --git a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go index 718a52067..1f8bf1a63 100644 --- a/storage/remote/otlptranslator/prometheus/unit_to_ucum.go +++ b/storage/remote/otlptranslator/prometheus/unit_to_ucum.go @@ -1,9 +1,20 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheus/unit_to_ucum.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheus // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" +package prometheus import "strings" diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 817cbaba7..68be82e44 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -1,29 +1,42 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/helper.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "encoding/hex" "fmt" "log" "math" + "slices" "sort" "strconv" - "strings" "time" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/timestamp" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" conventions "go.opentelemetry.io/collector/semconv/v1.6.1" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -48,7 +61,7 @@ const ( ) type bucketBoundsData struct { - sig string + ts *prompb.TimeSeries bound float64 } @@ -66,94 +79,47 @@ func (a ByLabelName) Len() int { return len(a) } func (a ByLabelName) Less(i, j int) bool { return a[i].Name < a[j].Name } func (a ByLabelName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -// addSample finds a TimeSeries in tsMap that corresponds to the label set labels, and add sample to the TimeSeries; it -// creates a new TimeSeries in the map if not found and returns the time series signature. -// tsMap will be unmodified if either labels or sample is nil, but can still be modified if the exemplar is nil. -func addSample(tsMap map[string]*prompb.TimeSeries, sample *prompb.Sample, labels []prompb.Label, - datatype string) string { - if sample == nil || labels == nil || tsMap == nil { - // This shouldn't happen - return "" - } - - sig := timeSeriesSignature(datatype, labels) - ts := tsMap[sig] - if ts != nil { - ts.Samples = append(ts.Samples, *sample) - } else { - newTs := &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{*sample}, - } - tsMap[sig] = newTs - } - - return sig -} - -// addExemplars finds a bucket bound that corresponds to the exemplars value and add the exemplar to the specific sig; -// we only add exemplars if samples are presents -// tsMap is unmodified if either of its parameters is nil and samples are nil. -func addExemplars(tsMap map[string]*prompb.TimeSeries, exemplars []prompb.Exemplar, bucketBoundsData []bucketBoundsData) { - if len(tsMap) == 0 || len(bucketBoundsData) == 0 || len(exemplars) == 0 { - return - } - - sort.Sort(byBucketBoundsData(bucketBoundsData)) - - for _, exemplar := range exemplars { - addExemplar(tsMap, bucketBoundsData, exemplar) - } -} - -func addExemplar(tsMap map[string]*prompb.TimeSeries, bucketBounds []bucketBoundsData, exemplar prompb.Exemplar) { - for _, bucketBound := range bucketBounds { - sig := bucketBound.sig - bound := bucketBound.bound - - ts := tsMap[sig] - if ts != nil && len(ts.Samples) > 0 && exemplar.Value <= bound { - ts.Exemplars = append(ts.Exemplars, exemplar) - return - } - } -} - -// timeSeries return a string signature in the form of: -// -// TYPE-label1-value1- ... -labelN-valueN -// -// the label slice should not contain duplicate label names; this method sorts the slice by label name before creating +// timeSeriesSignature returns a hashed label set signature. +// The label slice should not contain duplicate label names; this method sorts the slice by label name before creating // the signature. -func timeSeriesSignature(datatype string, labels []prompb.Label) string { - length := len(datatype) - - for _, lb := range labels { - length += 2 + len(lb.GetName()) + len(lb.GetValue()) - } - - b := strings.Builder{} - b.Grow(length) - b.WriteString(datatype) - +// The algorithm is the same as in Prometheus' labels.StableHash function. +func timeSeriesSignature(labels []prompb.Label) uint64 { sort.Sort(ByLabelName(labels)) - for _, lb := range labels { - b.WriteString("-") - b.WriteString(lb.GetName()) - b.WriteString("-") - b.WriteString(lb.GetValue()) - } + // Use xxhash.Sum64(b) for fast path as it's faster. + b := make([]byte, 0, 1024) + for i, v := range labels { + if len(b)+len(v.Name)+len(v.Value)+2 >= cap(b) { + // If labels entry is 1KB+ do not allocate whole entry. + h := xxhash.New() + _, _ = h.Write(b) + for _, v := range labels[i:] { + _, _ = h.WriteString(v.Name) + _, _ = h.Write(seps) + _, _ = h.WriteString(v.Value) + _, _ = h.Write(seps) + } + return h.Sum64() + } - return b.String() + b = append(b, v.Name...) + b = append(b, seps[0]) + b = append(b, v.Value...) + b = append(b, seps[0]) + } + return xxhash.Sum64(b) } +var seps = []byte{'\xff'} + // createAttributes creates a slice of Prometheus Labels with OTLP attributes and pairs of string values. -// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen, and overwrites are -// logged. Resulting label names are sanitized. -func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, extras ...string) []prompb.Label { - serviceName, haveServiceName := resource.Attributes().Get(conventions.AttributeServiceName) - instance, haveInstanceID := resource.Attributes().Get(conventions.AttributeServiceInstanceID) +// Unpaired string values are ignored. String pairs overwrite OTLP labels if collisions happen and +// if logOnOverwrite is true, the overwrite is logged. Resulting label names are sanitized. +func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externalLabels map[string]string, + ignoreAttrs []string, logOnOverwrite bool, extras ...string) []prompb.Label { + resourceAttrs := resource.Attributes() + serviceName, haveServiceName := resourceAttrs.Get(conventions.AttributeServiceName) + instance, haveInstanceID := resourceAttrs.Get(conventions.AttributeServiceInstanceID) // Calculate the maximum possible number of labels we could return so we can preallocate l maxLabelCount := attributes.Len() + len(externalLabels) + len(extras)/2 @@ -171,9 +137,13 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Ensure attributes are sorted by key for consistent merging of keys which // collide when sanitized. - labels := make([]prompb.Label, 0, attributes.Len()) + labels := make([]prompb.Label, 0, maxLabelCount) + // XXX: Should we always drop service namespace/service name/service instance ID from the labels + // (as they get mapped to other Prometheus labels)? attributes.Range(func(key string, value pcommon.Value) bool { - labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + if !slices.Contains(ignoreAttrs, key) { + labels = append(labels, prompb.Label{Name: key, Value: value.AsString()}) + } return true }) sort.Stable(ByLabelName(labels)) @@ -190,7 +160,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa // Map service.name + service.namespace to job if haveServiceName { val := serviceName.AsString() - if serviceNamespace, ok := resource.Attributes().Get(conventions.AttributeServiceNamespace); ok { + if serviceNamespace, ok := resourceAttrs.Get(conventions.AttributeServiceNamespace); ok { val = fmt.Sprintf("%s/%s", serviceNamespace.AsString(), val) } l[model.JobLabel] = val @@ -213,7 +183,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa break } _, found := l[extras[i]] - if found { + if found && logOnOverwrite { log.Println("label " + extras[i] + " is overwritten. Check if Prometheus reserved labels are used.") } // internal labels should be maintained @@ -224,12 +194,12 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, externa l[name] = extras[i+1] } - s := make([]prompb.Label, 0, len(l)) + labels = labels[:0] for k, v := range l { - s = append(s, prompb.Label{Name: k, Value: v}) + labels = append(labels, prompb.Label{Name: k, Value: v}) } - return s + return labels } // isValidAggregationTemporality checks whether an OTel metric has a valid @@ -249,100 +219,84 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { return false } -// addSingleHistogramDataPoint converts pt to 2 + min(len(ExplicitBounds), len(BucketCount)) + 1 samples. It -// ignore extra buckets if len(ExplicitBounds) > len(BucketCounts) -func addSingleHistogramDataPoint(pt pmetric.HistogramDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) +func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) - createLabels := func(nameSuffix string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) + // If the sum is unset, it indicates the _sum metric point should be + // omitted + if pt.HasSum() { + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + sum.Value = math.Float64frombits(value.StaleNaN) + } + + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) } - // sum, count, and buckets of the histogram should append suffix to baseName - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: baseName + nameSuffix}) - - return labels - } - - // If the sum is unset, it indicates the _sum metric point should be - // omitted - if pt.HasSum() { - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) + count.Value = math.Float64frombits(value.StaleNaN) } - sumlabels := createLabels(sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) - } + // cumulative count for conversion to cumulative histogram + var cumulativeCount uint64 - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } + var bucketBounds []bucketBoundsData - countlabels := createLabels(countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) + // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 + for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { + bound := pt.ExplicitBounds().At(i) + cumulativeCount += pt.BucketCounts().At(i) + bucket := &prompb.Sample{ + Value: float64(cumulativeCount), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + bucket.Value = math.Float64frombits(value.StaleNaN) + } + boundStr := strconv.FormatFloat(bound, 'f', -1, 64) + labels := createLabels(baseName+bucketStr, baseLabels, leStr, boundStr) + ts := c.addSample(bucket, labels) - // cumulative count for conversion to cumulative histogram - var cumulativeCount uint64 - - promExemplars := getPromExemplars[pmetric.HistogramDataPoint](pt) - - var bucketBounds []bucketBoundsData - - // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 - for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { - bound := pt.ExplicitBounds().At(i) - cumulativeCount += pt.BucketCounts().At(i) - bucket := &prompb.Sample{ - Value: float64(cumulativeCount), + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: bound}) + } + // add le=+Inf bucket + infBucket := &prompb.Sample{ Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - bucket.Value = math.Float64frombits(value.StaleNaN) + infBucket.Value = math.Float64frombits(value.StaleNaN) + } else { + infBucket.Value = float64(pt.Count()) } - boundStr := strconv.FormatFloat(bound, 'f', -1, 64) - labels := createLabels(bucketStr, leStr, boundStr) - sig := addSample(tsMap, bucket, labels, metric.Type().String()) + infLabels := createLabels(baseName+bucketStr, baseLabels, leStr, pInfStr) + ts := c.addSample(infBucket, infLabels) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: bound}) - } - // add le=+Inf bucket - infBucket := &prompb.Sample{ - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - infBucket.Value = math.Float64frombits(value.StaleNaN) - } else { - infBucket.Value = float64(pt.Count()) - } - infLabels := createLabels(bucketStr, leStr, pInfStr) - sig := addSample(tsMap, infBucket, infLabels, metric.Type().String()) + bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: math.Inf(1)}) + c.addExemplars(pt, bucketBounds) - bucketBounds = append(bucketBounds, bucketBoundsData{sig: sig, bound: math.Inf(1)}) - addExemplars(tsMap, promExemplars, bucketBounds) - - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - labels := createLabels(createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, labels, startTimestamp, pt.Timestamp(), metric.Type().String()) + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + labels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(labels, startTimestamp, pt.Timestamp()) + } } } @@ -415,162 +369,203 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() for x := 0; x < dataPoints.Len(); x++ { - ts = maxTimestamp(ts, dataPoints.At(x).Timestamp()) + ts = max(ts, dataPoints.At(x).Timestamp()) } } return ts } -func maxTimestamp(a, b pcommon.Timestamp) pcommon.Timestamp { - if a > b { - return a - } - return b -} +func (c *PrometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, + settings Settings, baseName string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + timestamp := convertTimeStamp(pt.Timestamp()) + baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels, nil, false) -// addSingleSummaryDataPoint converts pt to len(QuantileValues) + 2 samples. -func addSingleSummaryDataPoint(pt pmetric.SummaryDataPoint, resource pcommon.Resource, metric pmetric.Metric, settings Settings, - tsMap map[string]*prompb.TimeSeries, baseName string) { - timestamp := convertTimeStamp(pt.Timestamp()) - baseLabels := createAttributes(resource, pt.Attributes(), settings.ExternalLabels) - - createLabels := func(name string, extras ...string) []prompb.Label { - extraLabelCount := len(extras) / 2 - labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name - copy(labels, baseLabels) - - for extrasIdx := 0; extrasIdx < extraLabelCount; extrasIdx++ { - labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) - } - - labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) - - return labels - } - - // treat sum as a sample in an individual TimeSeries - sum := &prompb.Sample{ - Value: pt.Sum(), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - sum.Value = math.Float64frombits(value.StaleNaN) - } - // sum and count of the summary should append suffix to baseName - sumlabels := createLabels(baseName + sumStr) - addSample(tsMap, sum, sumlabels, metric.Type().String()) - - // treat count as a sample in an individual TimeSeries - count := &prompb.Sample{ - Value: float64(pt.Count()), - Timestamp: timestamp, - } - if pt.Flags().NoRecordedValue() { - count.Value = math.Float64frombits(value.StaleNaN) - } - countlabels := createLabels(baseName + countStr) - addSample(tsMap, count, countlabels, metric.Type().String()) - - // process each percentile/quantile - for i := 0; i < pt.QuantileValues().Len(); i++ { - qt := pt.QuantileValues().At(i) - quantile := &prompb.Sample{ - Value: qt.Value(), + // treat sum as a sample in an individual TimeSeries + sum := &prompb.Sample{ + Value: pt.Sum(), Timestamp: timestamp, } if pt.Flags().NoRecordedValue() { - quantile.Value = math.Float64frombits(value.StaleNaN) + sum.Value = math.Float64frombits(value.StaleNaN) } - percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) - qtlabels := createLabels(baseName, quantileStr, percentileStr) - addSample(tsMap, quantile, qtlabels, metric.Type().String()) - } + // sum and count of the summary should append suffix to baseName + sumlabels := createLabels(baseName+sumStr, baseLabels) + c.addSample(sum, sumlabels) - // add _created time series if needed - startTimestamp := pt.StartTimestamp() - if settings.ExportCreatedMetric && startTimestamp != 0 { - createdLabels := createLabels(baseName + createdSuffix) - addCreatedTimeSeriesIfNeeded(tsMap, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) + // treat count as a sample in an individual TimeSeries + count := &prompb.Sample{ + Value: float64(pt.Count()), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + count.Value = math.Float64frombits(value.StaleNaN) + } + countlabels := createLabels(baseName+countStr, baseLabels) + c.addSample(count, countlabels) + + // process each percentile/quantile + for i := 0; i < pt.QuantileValues().Len(); i++ { + qt := pt.QuantileValues().At(i) + quantile := &prompb.Sample{ + Value: qt.Value(), + Timestamp: timestamp, + } + if pt.Flags().NoRecordedValue() { + quantile.Value = math.Float64frombits(value.StaleNaN) + } + percentileStr := strconv.FormatFloat(qt.Quantile(), 'f', -1, 64) + qtlabels := createLabels(baseName, baseLabels, quantileStr, percentileStr) + c.addSample(quantile, qtlabels) + } + + startTimestamp := pt.StartTimestamp() + if settings.ExportCreatedMetric && startTimestamp != 0 { + createdLabels := createLabels(baseName+createdSuffix, baseLabels) + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) + } } } -// addCreatedTimeSeriesIfNeeded adds {name}_created time series with a single -// sample. If the series exists, then new samples won't be added. -func addCreatedTimeSeriesIfNeeded( - series map[string]*prompb.TimeSeries, - labels []prompb.Label, - startTimestamp pcommon.Timestamp, - timestamp pcommon.Timestamp, - metricType string, -) { - sig := timeSeriesSignature(metricType, labels) - if _, ok := series[sig]; !ok { - series[sig] = &prompb.TimeSeries{ - Labels: labels, - Samples: []prompb.Sample{ - { // convert ns to ms - Value: float64(convertTimeStamp(startTimestamp)), - Timestamp: convertTimeStamp(timestamp), - }, +// createLabels returns a copy of baseLabels, adding to it the pair model.MetricNameLabel=name. +// If extras are provided, corresponding label pairs are also added to the returned slice. +// If extras is uneven length, the last (unpaired) extra will be ignored. +func createLabels(name string, baseLabels []prompb.Label, extras ...string) []prompb.Label { + extraLabelCount := len(extras) / 2 + labels := make([]prompb.Label, len(baseLabels), len(baseLabels)+extraLabelCount+1) // +1 for name + copy(labels, baseLabels) + + n := len(extras) + n -= n % 2 + for extrasIdx := 0; extrasIdx < n; extrasIdx += 2 { + labels = append(labels, prompb.Label{Name: extras[extrasIdx], Value: extras[extrasIdx+1]}) + } + + labels = append(labels, prompb.Label{Name: model.MetricNameLabel, Value: name}) + return labels +} + +// getOrCreateTimeSeries returns the time series corresponding to the label set if existent, and false. +// Otherwise it creates a new one and returns that, and true. +func (c *PrometheusConverter) getOrCreateTimeSeries(lbls []prompb.Label) (*prompb.TimeSeries, bool) { + h := timeSeriesSignature(lbls) + ts := c.unique[h] + if ts != nil { + if isSameMetric(ts, lbls) { + // We already have this metric + return ts, false + } + + // Look for a matching conflict + for _, cTS := range c.conflicts[h] { + if isSameMetric(cTS, lbls) { + // We already have this metric + return cTS, false + } + } + + // New conflict + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.conflicts[h] = append(c.conflicts[h], ts) + return ts, true + } + + // This metric is new + ts = &prompb.TimeSeries{ + Labels: lbls, + } + c.unique[h] = ts + return ts, true +} + +// addTimeSeriesIfNeeded adds a corresponding time series if it doesn't already exist. +// If the time series doesn't already exist, it gets added with startTimestamp for its value and timestamp for its timestamp, +// both converted to milliseconds. +func (c *PrometheusConverter) addTimeSeriesIfNeeded(lbls []prompb.Label, startTimestamp pcommon.Timestamp, timestamp pcommon.Timestamp) { + ts, created := c.getOrCreateTimeSeries(lbls) + if created { + ts.Samples = []prompb.Sample{ + { + // convert ns to ms + Value: float64(convertTimeStamp(startTimestamp)), + Timestamp: convertTimeStamp(timestamp), }, } } } -// addResourceTargetInfo converts the resource to the target info metric -func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, tsMap map[string]*prompb.TimeSeries) { - if settings.DisableTargetInfo { +// addResourceTargetInfo converts the resource to the target info metric. +func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timestamp pcommon.Timestamp, converter *PrometheusConverter) { + if settings.DisableTargetInfo || timestamp == 0 { return } - // Use resource attributes (other than those used for job+instance) as the - // metric labels for the target info metric - attributes := pcommon.NewMap() - resource.Attributes().CopyTo(attributes) - attributes.RemoveIf(func(k string, _ pcommon.Value) bool { - switch k { - case conventions.AttributeServiceName, conventions.AttributeServiceNamespace, conventions.AttributeServiceInstanceID: - // Remove resource attributes used for job + instance - return true - default: - return false + + attributes := resource.Attributes() + identifyingAttrs := []string{ + conventions.AttributeServiceNamespace, + conventions.AttributeServiceName, + conventions.AttributeServiceInstanceID, + } + nonIdentifyingAttrsCount := attributes.Len() + for _, a := range identifyingAttrs { + _, haveAttr := attributes.Get(a) + if haveAttr { + nonIdentifyingAttrsCount-- } - }) - if attributes.Len() == 0 { + } + if nonIdentifyingAttrsCount == 0 { // If we only have job + instance, then target_info isn't useful, so don't add it. return } - // create parameters for addSample + name := targetMetricName if len(settings.Namespace) > 0 { name = settings.Namespace + "_" + name } - labels := createAttributes(resource, attributes, settings.ExternalLabels, model.MetricNameLabel, name) + + labels := createAttributes(resource, attributes, settings.ExternalLabels, identifyingAttrs, false, model.MetricNameLabel, name) + haveIdentifier := false + for _, l := range labels { + if l.Name == model.JobLabel || l.Name == model.InstanceLabel { + haveIdentifier = true + break + } + } + + if !haveIdentifier { + // We need at least one identifying label to generate target_info. + return + } + sample := &prompb.Sample{ Value: float64(1), // convert ns to ms Timestamp: convertTimeStamp(timestamp), } - addSample(tsMap, sample, labels, infoType) + converter.addSample(sample, labels) } // convertTimeStamp converts OTLP timestamp in ns to timestamp in ms diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 14cea32c3..31d343fe4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -1,58 +1,59 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/histograms.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "fmt" "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) const defaultZeroThreshold = 1e-128 -func addSingleExponentialHistogramDataPoint( - metric string, - pt pmetric.ExponentialHistogramDataPoint, - resource pcommon.Resource, - settings Settings, - series map[string]*prompb.TimeSeries, -) error { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - metric, - ) +func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) error { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + baseName, + ) + ts, _ := c.getOrCreateTimeSeries(lbls) - sig := timeSeriesSignature( - pmetric.MetricTypeExponentialHistogram.String(), - labels, - ) - ts, ok := series[sig] - if !ok { - ts = &prompb.TimeSeries{ - Labels: labels, + histogram, err := exponentialToNativeHistogram(pt) + if err != nil { + return err } - series[sig] = ts - } + ts.Histograms = append(ts.Histograms, histogram) - histogram, err := exponentialToNativeHistogram(pt) - if err != nil { - return err + exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - ts.Histograms = append(ts.Histograms, histogram) - - exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) return nil } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index fb141034a..65dac99c5 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -1,19 +1,31 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "errors" "fmt" + "sort" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/multierr" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -26,10 +38,21 @@ type Settings struct { SendMetadata bool } -// FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*prompb.TimeSeries, errs error) { - tsMap = make(map[string]*prompb.TimeSeries) +// PrometheusConverter converts from OTel write format to Prometheus remote write format. +type PrometheusConverter struct { + unique map[uint64]*prompb.TimeSeries + conflicts map[uint64][]*prompb.TimeSeries +} +func NewPrometheusConverter() *PrometheusConverter { + return &PrometheusConverter{ + unique: map[uint64]*prompb.TimeSeries{}, + conflicts: map[uint64][]*prompb.TimeSeries{}, + } +} + +// FromMetrics converts pmetric.Metrics to Prometheus remote write format. +func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -39,13 +62,12 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp // use with the "target" info metric var mostRecentTimestamp pcommon.Timestamp for j := 0; j < scopeMetricsSlice.Len(); j++ { - scopeMetrics := scopeMetricsSlice.At(j) - metricSlice := scopeMetrics.Metrics() + metricSlice := scopeMetricsSlice.At(j).Metrics() // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { metric := metricSlice.At(k) - mostRecentTimestamp = maxTimestamp(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) + mostRecentTimestamp = max(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) if !isValidAggregationTemporality(metric) { errs = multierr.Append(errs, fmt.Errorf("invalid temporality and type combination for metric %q", metric.Name())) @@ -54,65 +76,106 @@ func FromMetrics(md pmetric.Metrics, settings Settings) (tsMap map[string]*promp promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) - // handle individual metric based on type + // handle individual metrics based on type //exhaustive:enforce switch metric.Type() { case pmetric.MetricTypeGauge: dataPoints := metric.Gauge().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleGaugeNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addGaugeNumberDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSumNumberDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSumNumberDataPoints(dataPoints, resource, metric, settings, promName) case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleHistogramDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addHistogramDataPoints(dataPoints, resource, settings, promName) case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - errs = multierr.Append( - errs, - addSingleExponentialHistogramDataPoint( - promName, - dataPoints.At(x), - resource, - settings, - tsMap, - ), - ) - } + errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( + dataPoints, + resource, + settings, + promName, + )) case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) + break } - for x := 0; x < dataPoints.Len(); x++ { - addSingleSummaryDataPoint(dataPoints.At(x), resource, metric, settings, tsMap, promName) - } + c.addSummaryDataPoints(dataPoints, resource, settings, promName) default: errs = multierr.Append(errs, errors.New("unsupported metric type")) } } } - addResourceTargetInfo(resource, settings, mostRecentTimestamp, tsMap) + addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) } return } + +func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { + if len(ts.Labels) != len(lbls) { + return false + } + for i, l := range ts.Labels { + if l.Name != ts.Labels[i].Name || l.Value != ts.Labels[i].Value { + return false + } + } + return true +} + +// addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, +// the exemplar is added to the bucket bound's time series, provided that the time series' has samples. +func (c *PrometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { + if len(bucketBounds) == 0 { + return + } + + exemplars := getPromExemplars(dataPoint) + if len(exemplars) == 0 { + return + } + + sort.Sort(byBucketBoundsData(bucketBounds)) + for _, exemplar := range exemplars { + for _, bound := range bucketBounds { + if len(bound.ts.Samples) > 0 && exemplar.Value <= bound.bound { + bound.ts.Exemplars = append(bound.ts.Exemplars, exemplar) + break + } + } + } +} + +// addSample finds a TimeSeries that corresponds to lbls, and adds sample to it. +// If there is no corresponding TimeSeries already, it's created. +// The corresponding TimeSeries is returned. +// If either lbls is nil/empty or sample is nil, nothing is done. +func (c *PrometheusConverter) addSample(sample *prompb.Sample, lbls []prompb.Label) *prompb.TimeSeries { + if sample == nil || len(lbls) == 0 { + // This shouldn't happen + return nil + } + + ts, _ := c.getOrCreateTimeSeries(lbls) + ts.Samples = append(ts.Samples, *sample) + return ts +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go new file mode 100644 index 000000000..37ac67774 --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -0,0 +1,134 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" + "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" +) + +func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { + for _, resourceAttributeCount := range []int{0, 5, 50} { + b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { + for _, histogramCount := range []int{0, 1000} { + b.Run(fmt.Sprintf("histogram count: %v", histogramCount), func(b *testing.B) { + nonHistogramCounts := []int{0, 1000} + + if resourceAttributeCount == 0 && histogramCount == 0 { + // Don't bother running a scenario where we'll generate no series. + nonHistogramCounts = []int{1000} + } + + for _, nonHistogramCount := range nonHistogramCounts { + b.Run(fmt.Sprintf("non-histogram count: %v", nonHistogramCount), func(b *testing.B) { + for _, labelsPerMetric := range []int{2, 20} { + b.Run(fmt.Sprintf("labels per metric: %v", labelsPerMetric), func(b *testing.B) { + for _, exemplarsPerSeries := range []int{0, 5, 10} { + b.Run(fmt.Sprintf("exemplars per series: %v", exemplarsPerSeries), func(b *testing.B) { + payload := createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries) + + for i := 0; i < b.N; i++ { + converter := NewPrometheusConverter() + require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) + require.NotNil(b, converter.TimeSeries()) + } + }) + } + }) + } + }) + } + }) + } + }) + } +} + +func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries int) pmetricotlp.ExportRequest { + request := pmetricotlp.NewExportRequest() + + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", resourceAttributeCount) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= histogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyHistogram() + m.SetName(fmt.Sprintf("histogram-%v", i)) + m.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.Histogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + // Set 50 samples, 10 each with values 0.5, 1, 2, 4, and 8 + h.SetCount(50) + h.SetSum(155) + h.BucketCounts().FromRaw([]uint64{10, 10, 10, 10, 10, 0}) + h.ExplicitBounds().FromRaw([]float64{.5, 1, 2, 4, 8, 16}) // Bucket boundaries include the upper limit (ie. each sample is on the upper limit of its bucket) + + generateAttributes(h.Attributes(), "series", labelsPerMetric) + generateExemplars(h.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptySum() + m.SetName(fmt.Sprintf("sum-%v", i)) + m.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + point := m.Sum().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + for i := 1; i <= nonHistogramCount; i++ { + m := metrics.AppendEmpty() + m.SetEmptyGauge() + m.SetName(fmt.Sprintf("gauge-%v", i)) + point := m.Gauge().DataPoints().AppendEmpty() + point.SetTimestamp(ts) + point.SetDoubleValue(1.23) + generateAttributes(point.Attributes(), "series", labelsPerMetric) + generateExemplars(point.Exemplars(), exemplarsPerSeries, ts) + } + + return request +} + +func generateAttributes(m pcommon.Map, prefix string, count int) { + for i := 1; i <= count; i++ { + m.PutStr(fmt.Sprintf("%v-name-%v", prefix, i), fmt.Sprintf("value-%v", i)) + } +} + +func generateExemplars(exemplars pmetric.ExemplarSlice, count int, ts pcommon.Timestamp) { + for i := 1; i <= count; i++ { + e := exemplars.AppendEmpty() + e.SetTimestamp(ts) + e.SetDoubleValue(2.22) + e.SetSpanID(pcommon.SpanID{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}) + e.SetTraceID(pcommon.TraceID{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}) + } +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index b5bd8765f..aafebc6c4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -1,106 +1,110 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/number_data_points.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( "math" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/value" - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/prompb" ) -// addSingleGaugeNumberDataPoint converts the Gauge metric data point to a -// Prometheus time series with samples and labels. The result is stored in the -// series map. -func addSingleGaugeNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, - name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), +func (c *PrometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + labels := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + c.addSample(sample, labels) } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - addSample(series, sample, labels, metric.Type().String()) } -// addSingleSumNumberDataPoint converts the Sum metric data point to a Prometheus -// time series with samples, labels and exemplars. The result is stored in the -// series map. -func addSingleSumNumberDataPoint( - pt pmetric.NumberDataPoint, - resource pcommon.Resource, - metric pmetric.Metric, - settings Settings, - series map[string]*prompb.TimeSeries, - name string, -) { - labels := createAttributes( - resource, - pt.Attributes(), - settings.ExternalLabels, - model.MetricNameLabel, name, - ) - sample := &prompb.Sample{ - // convert ns to ms - Timestamp: convertTimeStamp(pt.Timestamp()), - } - switch pt.ValueType() { - case pmetric.NumberDataPointValueTypeInt: - sample.Value = float64(pt.IntValue()) - case pmetric.NumberDataPointValueTypeDouble: - sample.Value = pt.DoubleValue() - } - if pt.Flags().NoRecordedValue() { - sample.Value = math.Float64frombits(value.StaleNaN) - } - sig := addSample(series, sample, labels, metric.Type().String()) - - if ts := series[sig]; sig != "" && ts != nil { - exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) - ts.Exemplars = append(ts.Exemplars, exemplars...) - } - - // add _created time series if needed - if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { - startTimestamp := pt.StartTimestamp() - if startTimestamp == 0 { - return +func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { + for x := 0; x < dataPoints.Len(); x++ { + pt := dataPoints.At(x) + lbls := createAttributes( + resource, + pt.Attributes(), + settings.ExternalLabels, + nil, + true, + model.MetricNameLabel, + name, + ) + sample := &prompb.Sample{ + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), + } + switch pt.ValueType() { + case pmetric.NumberDataPointValueTypeInt: + sample.Value = float64(pt.IntValue()) + case pmetric.NumberDataPointValueTypeDouble: + sample.Value = pt.DoubleValue() + } + if pt.Flags().NoRecordedValue() { + sample.Value = math.Float64frombits(value.StaleNaN) + } + ts := c.addSample(sample, lbls) + if ts != nil { + exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) + ts.Exemplars = append(ts.Exemplars, exemplars...) } - createdLabels := make([]prompb.Label, len(labels)) - copy(createdLabels, labels) - for i, l := range createdLabels { - if l.Name == model.MetricNameLabel { - createdLabels[i].Value = name + createdSuffix - break + // add created time series if needed + if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { + startTimestamp := pt.StartTimestamp() + if startTimestamp == 0 { + return } + + createdLabels := make([]prompb.Label, len(lbls)) + copy(createdLabels, lbls) + for i, l := range createdLabels { + if l.Name == model.MetricNameLabel { + createdLabels[i].Value = name + createdSuffix + break + } + } + c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) } - addCreatedTimeSeriesIfNeeded(series, createdLabels, startTimestamp, pt.Timestamp(), metric.Type().String()) } } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index e43797212..ba4870419 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -1,14 +1,25 @@ -// DO NOT EDIT. COPIED AS-IS. SEE ../README.md +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package prometheusremotewrite // import "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite" +package prometheusremotewrite import ( - "github.com/prometheus/prometheus/prompb" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go new file mode 100644 index 000000000..fe973761a --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: +// https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/95e8f8fdc2a9dc87230406c9a3cf02be4fd68bea/pkg/translator/prometheusremotewrite/metrics_to_prw.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: Copyright The OpenTelemetry Authors. + +package prometheusremotewrite + +import ( + "github.com/prometheus/prometheus/prompb" +) + +// TimeSeries returns a slice of the prompb.TimeSeries that were converted from OTel format. +func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { + conflicts := 0 + for _, ts := range c.conflicts { + conflicts += len(ts) + } + allTS := make([]prompb.TimeSeries, 0, len(c.unique)+conflicts) + for _, ts := range c.unique { + allTS = append(allTS, *ts) + } + for _, cTS := range c.conflicts { + for _, ts := range cTS { + allTS = append(allTS, *ts) + } + } + + return allTS +} diff --git a/storage/remote/otlptranslator/update-copy.sh b/storage/remote/otlptranslator/update-copy.sh deleted file mode 100755 index 8aa645e0b..000000000 --- a/storage/remote/otlptranslator/update-copy.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -xe - -OTEL_VERSION=v0.95.0 - -git clone https://github.com/open-telemetry/opentelemetry-collector-contrib ./tmp -cd ./tmp -git checkout $OTEL_VERSION -cd .. - -rm -rf ./prometheusremotewrite/* -cp -r ./tmp/pkg/translator/prometheusremotewrite/*.go ./prometheusremotewrite -rm -rf ./prometheusremotewrite/*_test.go - -rm -rf ./prometheus/* -cp -r ./tmp/pkg/translator/prometheus/*.go ./prometheus -rm -rf ./prometheus/*_test.go - -rm -rf ./tmp - -case $(sed --help 2>&1) in - *GNU*) set sed -i;; - *) set sed -i '';; -esac - -"$@" -e 's#github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus#github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus#g' ./prometheusremotewrite/*.go ./prometheus/*.go -"$@" -e '1s#^#// DO NOT EDIT. COPIED AS-IS. SEE ../README.md\n\n#g' ./prometheusremotewrite/*.go ./prometheus/*.go diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index e32a3ace0..6121fb6c0 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -468,7 +468,7 @@ func TestReleaseNoninternedString(t *testing.T) { m.StoreSeries([]record.RefSeries{ { Ref: chunks.HeadSeriesRef(i), - Labels: labels.FromStrings("asdf", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("asdf", strconv.Itoa(i)), }, }, 0) m.SeriesReset(1) diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index e8e0ecb8d..452b29221 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -30,14 +30,14 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/prompb" - "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/teststorage" ) func TestSampledReadEndpoint(t *testing.T) { - store := promql.LoadedStorage(t, ` + store := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar",baz="qux"} 1 `) @@ -132,7 +132,7 @@ func TestSampledReadEndpoint(t *testing.T) { } func BenchmarkStreamReadEndpoint(b *testing.B) { - store := promql.LoadedStorage(b, ` + store := promqltest.LoadedStorage(b, ` load 1m test_metric1{foo="bar1",baz="qux"} 0+100x119 test_metric1{foo="bar2",baz="qux"} 0+100x120 @@ -200,7 +200,7 @@ func TestStreamReadEndpoint(t *testing.T) { // Second with 121 float samples, We expect 1 frame with 2 chunks. // Third with 241 float samples. We expect 1 frame with 2 chunks, and 1 frame with 1 chunk for the same series due to bytes limit. // Fourth with 25 histogram samples. We expect 1 frame with 1 chunk. - store := promql.LoadedStorage(t, ` + store := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar1",baz="qux"} 0+100x119 test_metric1{foo="bar2",baz="qux"} 0+100x120 diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index bb6b8423a..ff227292b 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -208,21 +208,15 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - prwMetricsMap, errs := otlptranslator.FromMetrics(req.Metrics(), otlptranslator.Settings{ + converter := otlptranslator.NewPrometheusConverter() + if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, - }) - if errs != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", errs) - } - - prwMetrics := make([]prompb.TimeSeries, 0, len(prwMetricsMap)) - - for _, ts := range prwMetricsMap { - prwMetrics = append(prwMetrics, *ts) + }); err != nil { + level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ - Timeseries: prwMetrics, + Timeseries: converter.TimeSeries(), }) switch { diff --git a/storage/series.go b/storage/series.go index eba11b4d9..70e3d0a19 100644 --- a/storage/series.go +++ b/storage/series.go @@ -55,8 +55,8 @@ func NewListSeries(lset labels.Labels, s []chunks.Sample) *SeriesEntry { } } -// NewListChunkSeriesFromSamples returns chunk series entry that allows to iterate over provided samples. -// NOTE: It uses inefficient chunks encoding implementation, not caring about chunk size. +// NewListChunkSeriesFromSamples returns a chunk series entry that allows to iterate over provided samples. +// NOTE: It uses an inefficient chunks encoding implementation, not caring about chunk size. // Use only for testing. func NewListChunkSeriesFromSamples(lset labels.Labels, samples ...[]chunks.Sample) *ChunkSeriesEntry { chksFromSamples := make([]chunks.Meta, 0, len(samples)) diff --git a/tsdb/agent/series_test.go b/tsdb/agent/series_test.go index ae327d858..bc5a4af5d 100644 --- a/tsdb/agent/series_test.go +++ b/tsdb/agent/series_test.go @@ -14,8 +14,8 @@ package agent import ( - "fmt" "math" + "strconv" "sync" "testing" "time" @@ -53,7 +53,7 @@ func TestNoDeadlock(t *testing.T) { series := &memSeries{ ref: chunks.HeadSeriesRef(i), lset: labels.FromMap(map[string]string{ - "id": fmt.Sprintf("%d", i), + "id": strconv.Itoa(i), }), } stripeSeries.Set(series.lset.Hash(), series) diff --git a/tsdb/block.go b/tsdb/block.go index abd223e4a..83b86a58d 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -77,6 +77,10 @@ type IndexReader interface { // during background garbage collections. Postings(ctx context.Context, name string, values ...string) (index.Postings, error) + // PostingsForLabelMatching returns a sorted iterator over postings having a label with the given name and a value for which match returns true. + // If no postings are found having at least one matching label, an empty iterator is returned. + PostingsForLabelMatching(ctx context.Context, name string, match func(value string) bool) index.Postings + // SortedPostings returns a postings list that is reordered to be sorted // by the label set of the underlying series. SortedPostings(index.Postings) index.Postings @@ -518,6 +522,10 @@ func (r blockIndexReader) Postings(ctx context.Context, name string, values ...s return p, nil } +func (r blockIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + return r.ir.PostingsForLabelMatching(ctx, name, match) +} + func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings { return r.ir.SortedPostings(p) } diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 6d15d1838..42acc3c69 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -36,6 +36,7 @@ import ( "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/fileutil" + "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/tsdb/wlog" ) @@ -509,6 +510,86 @@ func TestLabelNamesWithMatchers(t *testing.T) { } } +func TestBlockIndexReader_PostingsForLabelMatching(t *testing.T) { + testPostingsForLabelMatching(t, 2, func(t *testing.T, series []labels.Labels) IndexReader { + var seriesEntries []storage.Series + for _, s := range series { + seriesEntries = append(seriesEntries, storage.NewListSeries(s, []chunks.Sample{sample{100, 0, nil, nil}})) + } + + blockDir := createBlock(t, t.TempDir(), seriesEntries) + files, err := sequenceFiles(chunkDir(blockDir)) + require.NoError(t, err) + require.NotEmpty(t, files, "No chunk created.") + + block, err := OpenBlock(nil, blockDir, nil) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, block.Close()) }) + + ir, err := block.Index() + require.NoError(t, err) + return ir + }) +} + +func testPostingsForLabelMatching(t *testing.T, offset storage.SeriesRef, setUp func(*testing.T, []labels.Labels) IndexReader) { + t.Helper() + + ctx := context.Background() + series := []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + } + ir := setUp(t, series) + t.Cleanup(func() { + require.NoError(t, ir.Close()) + }) + + testCases := []struct { + name string + labelName string + match func(string) bool + exp []storage.SeriesRef + }{ + { + name: "n=1", + labelName: "n", + match: func(val string) bool { + return val == "1" + }, + exp: []storage.SeriesRef{offset + 1, offset + 2, offset + 3}, + }, + { + name: "n=2", + labelName: "n", + match: func(val string) bool { + return val == "2" + }, + exp: []storage.SeriesRef{offset + 4}, + }, + { + name: "missing label", + labelName: "missing", + match: func(val string) bool { + return true + }, + exp: nil, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := ir.PostingsForLabelMatching(ctx, tc.labelName, tc.match) + require.NotNil(t, p) + srs, err := index.ExpandPostings(p) + require.NoError(t, err) + require.Equal(t, tc.exp, srs) + }) + } +} + // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []storage.Series) string { blockDir, err := CreateBlock(series, dir, 0, log.NewNopLogger()) diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 73bc5f1e3..32346d69d 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -42,7 +42,7 @@ type BlockWriter struct { // ErrNoSeriesAppended is returned if the series count is zero while flushing blocks. var ErrNoSeriesAppended = errors.New("no series appended, aborting") -// NewBlockWriter create a new block writer. +// NewBlockWriter creates a new block writer. // // The returned writer accumulates all the series in the Head block until `Flush` is called. // diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index 7b17f4686..8cc59f3ea 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -52,6 +52,12 @@ type bstream struct { count uint8 // How many right-most bits are available for writing in the current byte (the last byte of the stream). } +// Reset resets b around stream. +func (b *bstream) Reset(stream []byte) { + b.stream = stream + b.count = 0 +} + func (b *bstream) bytes() []byte { return b.stream } diff --git a/tsdb/chunkenc/bstream_test.go b/tsdb/chunkenc/bstream_test.go index 66a54bc8e..8ac45ef0b 100644 --- a/tsdb/chunkenc/bstream_test.go +++ b/tsdb/chunkenc/bstream_test.go @@ -19,6 +19,19 @@ import ( "github.com/stretchr/testify/require" ) +func TestBstream_Reset(t *testing.T) { + bs := bstream{ + stream: []byte("test"), + count: 10, + } + bs.Reset([]byte("was reset")) + + require.Equal(t, bstream{ + stream: []byte("was reset"), + count: 0, + }, bs) +} + func TestBstreamReader(t *testing.T) { // Write to the bit stream. w := bstream{} diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go index 21c41257b..1421f3b39 100644 --- a/tsdb/chunkenc/chunk.go +++ b/tsdb/chunkenc/chunk.go @@ -87,6 +87,9 @@ type Chunk interface { // There's no strong guarantee that no samples will be appended once // Compact() is called. Implementing this function is optional. Compact() + + // Reset resets the chunk given stream. + Reset(stream []byte) } type Iterable interface { @@ -303,64 +306,47 @@ func NewPool() Pool { } func (p *pool) Get(e Encoding, b []byte) (Chunk, error) { + var c Chunk switch e { case EncXOR: - c := p.xor.Get().(*XORChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.xor.Get().(*XORChunk) case EncHistogram: - c := p.histogram.Get().(*HistogramChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.histogram.Get().(*HistogramChunk) case EncFloatHistogram: - c := p.floatHistogram.Get().(*FloatHistogramChunk) - c.b.stream = b - c.b.count = 0 - return c, nil + c = p.floatHistogram.Get().(*FloatHistogramChunk) + default: + return nil, fmt.Errorf("invalid chunk encoding %q", e) } - return nil, fmt.Errorf("invalid chunk encoding %q", e) + + c.Reset(b) + return c, nil } func (p *pool) Put(c Chunk) error { + var sp *sync.Pool + var ok bool switch c.Encoding() { case EncXOR: - xc, ok := c.(*XORChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - xc.b.stream = nil - xc.b.count = 0 - p.xor.Put(c) + _, ok = c.(*XORChunk) + sp = &p.xor case EncHistogram: - sh, ok := c.(*HistogramChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - sh.b.stream = nil - sh.b.count = 0 - p.histogram.Put(c) + _, ok = c.(*HistogramChunk) + sp = &p.histogram case EncFloatHistogram: - sh, ok := c.(*FloatHistogramChunk) - // This may happen often with wrapped chunks. Nothing we can really do about - // it but returning an error would cause a lot of allocations again. Thus, - // we just skip it. - if !ok { - return nil - } - sh.b.stream = nil - sh.b.count = 0 - p.floatHistogram.Put(c) + _, ok = c.(*FloatHistogramChunk) + sp = &p.floatHistogram default: return fmt.Errorf("invalid chunk encoding %q", c.Encoding()) } + if !ok { + // This may happen often with wrapped chunks. Nothing we can really do about + // it but returning an error would cause a lot of allocations again. Thus, + // we just skip it. + return nil + } + + c.Reset(nil) + sp.Put(c) return nil } diff --git a/tsdb/chunkenc/chunk_test.go b/tsdb/chunkenc/chunk_test.go index 9db1bf364..b72492a08 100644 --- a/tsdb/chunkenc/chunk_test.go +++ b/tsdb/chunkenc/chunk_test.go @@ -110,6 +110,96 @@ func testChunk(t *testing.T, c Chunk) { require.Equal(t, ValNone, it3.Seek(exp[len(exp)-1].t+1)) } +func TestPool(t *testing.T) { + p := NewPool() + for _, tc := range []struct { + name string + encoding Encoding + expErr error + }{ + { + name: "xor", + encoding: EncXOR, + }, + { + name: "histogram", + encoding: EncHistogram, + }, + { + name: "float histogram", + encoding: EncFloatHistogram, + }, + { + name: "invalid encoding", + encoding: EncNone, + expErr: fmt.Errorf(`invalid chunk encoding "none"`), + }, + } { + t.Run(tc.name, func(t *testing.T) { + c, err := p.Get(tc.encoding, []byte("test")) + if tc.expErr != nil { + require.EqualError(t, err, tc.expErr.Error()) + return + } + + require.NoError(t, err) + + var b *bstream + switch tc.encoding { + case EncHistogram: + b = &c.(*HistogramChunk).b + case EncFloatHistogram: + b = &c.(*FloatHistogramChunk).b + default: + b = &c.(*XORChunk).b + } + + require.Equal(t, &bstream{ + stream: []byte("test"), + count: 0, + }, b) + + b.count = 1 + require.NoError(t, p.Put(c)) + require.Equal(t, &bstream{ + stream: nil, + count: 0, + }, b) + }) + } + + t.Run("put bad chunk wrapper", func(t *testing.T) { + // When a wrapping chunk poses as an encoding it can't be converted to, Put should skip it. + c := fakeChunk{ + encoding: EncXOR, + t: t, + } + require.NoError(t, p.Put(c)) + }) + t.Run("put invalid encoding", func(t *testing.T) { + c := fakeChunk{ + encoding: EncNone, + t: t, + } + require.EqualError(t, p.Put(c), `invalid chunk encoding "none"`) + }) +} + +type fakeChunk struct { + Chunk + + encoding Encoding + t *testing.T +} + +func (c fakeChunk) Encoding() Encoding { + return c.encoding +} + +func (c fakeChunk) Reset([]byte) { + c.t.Fatal("Reset should not be called") +} + func benchmarkIterator(b *testing.B, newChunk func() Chunk) { const samplesPerChunk = 250 var ( diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index 88d189254..1eed46ca8 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -44,6 +44,10 @@ func NewFloatHistogramChunk() *FloatHistogramChunk { return &FloatHistogramChunk{b: bstream{stream: b, count: 0}} } +func (c *FloatHistogramChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // xorValue holds all the necessary information to encode // and decode XOR encoded float64 values. type xorValue struct { diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index cb09eda26..e12aec4dc 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -45,6 +45,10 @@ func NewHistogramChunk() *HistogramChunk { return &HistogramChunk{b: bstream{stream: b, count: 0}} } +func (c *HistogramChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // Encoding returns the encoding type. func (c *HistogramChunk) Encoding() Encoding { return EncHistogram diff --git a/tsdb/chunkenc/varbit.go b/tsdb/chunkenc/varbit.go index b43574dcb..574edec48 100644 --- a/tsdb/chunkenc/varbit.go +++ b/tsdb/chunkenc/varbit.go @@ -61,7 +61,7 @@ func putVarbitInt(b *bstream, val int64) { } } -// readVarbitInt reads an int64 encoced with putVarbitInt. +// readVarbitInt reads an int64 encoded with putVarbitInt. func readVarbitInt(b *bstreamReader) (int64, error) { var d byte for i := 0; i < 8; i++ { @@ -166,7 +166,7 @@ func putVarbitUint(b *bstream, val uint64) { } } -// readVarbitUint reads a uint64 encoced with putVarbitUint. +// readVarbitUint reads a uint64 encoded with putVarbitUint. func readVarbitUint(b *bstreamReader) (uint64, error) { var d byte for i := 0; i < 8; i++ { diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 07b923831..9430de396 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -66,6 +66,10 @@ func NewXORChunk() *XORChunk { return &XORChunk{b: bstream{stream: b, count: 0}} } +func (c *XORChunk) Reset(stream []byte) { + c.b.Reset(stream) +} + // Encoding returns the encoding type. func (c *XORChunk) Encoding() Encoding { return EncXOR @@ -171,7 +175,6 @@ func (a *xorAppender) Append(t int64, v float64) { } a.writeVDelta(v) - default: tDelta = uint64(t - a.t) dod := int64(tDelta - a.tDelta) diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index 0826f6967..e7df0eeed 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -233,7 +233,7 @@ func ChunkMetasToSamples(chunks []Meta) (result []Sample) { // Iterator iterates over the chunks of a single time series. type Iterator interface { // At returns the current meta. - // It depends on implementation if the chunk is populated or not. + // It depends on the implementation whether the chunk is populated or not. At() Meta // Next advances the iterator by one. Next() bool @@ -478,7 +478,7 @@ func (w *Writer) WriteChunks(chks ...Meta) error { // the batch is too large to fit in the current segment. cutNewBatch := (i != 0) && (batchSize+SegmentHeaderSize > w.segmentSize) - // When the segment already has some data than + // If the segment already has some data then // the first batch size calculation should account for that. if firstBatch && w.n > SegmentHeaderSize { cutNewBatch = batchSize+w.n > w.segmentSize @@ -717,7 +717,7 @@ func nextSequenceFile(dir string) (string, int, error) { } // It is not necessary that we find the files in number order, // for example with '1000000' and '200000', '1000000' would come first. - // Though this is a very very race case, we check anyway for the max id. + // Though this is a very very rare case, we check anyway for the max id. if j > i { i = j } diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index 087f25fbb..6c8707c57 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -188,8 +188,8 @@ func (f *chunkPos) bytesToWriteForChunk(chkLen uint64) uint64 { return bytes } -// ChunkDiskMapper is for writing the Head block chunks to the disk -// and access chunks via mmapped file. +// ChunkDiskMapper is for writing the Head block chunks to disk +// and access chunks via mmapped files. type ChunkDiskMapper struct { /// Writer. dir *os.File @@ -231,7 +231,7 @@ type ChunkDiskMapper struct { closed bool } -// mmappedChunkFile provides mmapp access to an entire head chunks file that holds many chunks. +// mmappedChunkFile provides mmap access to an entire head chunks file that holds many chunks. type mmappedChunkFile struct { byteSlice ByteSlice maxt int64 // Max timestamp among all of this file's chunks. @@ -240,7 +240,7 @@ type mmappedChunkFile struct { // NewChunkDiskMapper returns a new ChunkDiskMapper against the given directory // using the default head chunk file duration. // NOTE: 'IterateAllChunks' method needs to be called at least once after creating ChunkDiskMapper -// to set the maxt of all the file. +// to set the maxt of all files. func NewChunkDiskMapper(reg prometheus.Registerer, dir string, pool chunkenc.Pool, writeBufferSize, writeQueueSize int) (*ChunkDiskMapper, error) { // Validate write buffer size. if writeBufferSize < MinWriteBufferSize || writeBufferSize > MaxWriteBufferSize { @@ -381,6 +381,33 @@ func listChunkFiles(dir string) (map[int]string, error) { return res, nil } +// HardLinkChunkFiles creates hardlinks for chunk files from src to dst. +// It does nothing if src doesn't exist and ensures dst is created if not. +func HardLinkChunkFiles(src, dst string) error { + _, err := os.Stat(src) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return fmt.Errorf("check source chunks dir: %w", err) + } + if err := os.MkdirAll(dst, 0o777); err != nil { + return fmt.Errorf("set up destination chunks dir: %w", err) + } + files, err := listChunkFiles(src) + if err != nil { + return fmt.Errorf("list chunks: %w", err) + } + for _, filePath := range files { + _, fileName := filepath.Split(filePath) + err := os.Link(filepath.Join(src, fileName), filepath.Join(dst, fileName)) + if err != nil { + return fmt.Errorf("hardlink a chunk: %w", err) + } + } + return nil +} + // repairLastChunkFile deletes the last file if it's empty. // Because we don't fsync when creating these files, we could end // up with an empty file at the end during an abrupt shutdown. @@ -425,7 +452,7 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro return files, nil } -// WriteChunk writes the chunk to the disk. +// WriteChunk writes the chunk to disk. // The returned chunk ref is the reference from where the chunk encoding starts for the chunk. func (cdm *ChunkDiskMapper) WriteChunk(seriesRef HeadSeriesRef, mint, maxt int64, chk chunkenc.Chunk, isOOO bool, callback func(err error)) (chkRef ChunkDiskMapperRef) { // cdm.evtlPosMtx must be held to serialize the calls to cdm.evtlPos.getNextChunkRef() and the writing of the chunk (either with or without queue). @@ -784,7 +811,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error // IterateAllChunks iterates all mmappedChunkFiles (in order of head chunk file name/number) and all the chunks within it // and runs the provided function with information about each chunk. It returns on the first error encountered. // NOTE: This method needs to be called at least once after creating ChunkDiskMapper -// to set the maxt of all the file. +// to set the maxt of all files. func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16, encoding chunkenc.Encoding, isOOO bool) error) (err error) { cdm.writePathMtx.Lock() defer cdm.writePathMtx.Unlock() @@ -904,7 +931,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu return nil } -// Truncate deletes the head chunk files whose file number is less than given fileNo. +// Truncate deletes the head chunk files with numbers less than the given fileNo. func (cdm *ChunkDiskMapper) Truncate(fileNo uint32) error { cdm.readPathMtx.RLock() diff --git a/tsdb/chunks/queue_test.go b/tsdb/chunks/queue_test.go index 5756e4585..9f761a5f3 100644 --- a/tsdb/chunks/queue_test.go +++ b/tsdb/chunks/queue_test.go @@ -55,7 +55,7 @@ func (q *writeJobQueue) assertInvariants(t *testing.T) { require.Len(t, s.segment, s.nextWrite) } // Last segment must have at least one element, or we wouldn't have created it. - require.Greater(t, s.nextWrite, 0) + require.Positive(t, s.nextWrite) } require.Equal(t, q.size, totalSize) diff --git a/tsdb/compact.go b/tsdb/compact.go index e09039cf3..c2ae23b2e 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -272,7 +272,7 @@ func (c *LeveledCompactor) plan(dms []dirMeta) ([]string, error) { meta := dms[i].meta if meta.MaxTime-meta.MinTime < c.ranges[len(c.ranges)/2] { // If the block is entirely deleted, then we don't care about the block being big enough. - // TODO: This is assuming single tombstone is for distinct series, which might be no true. + // TODO: This is assuming a single tombstone is for a distinct series, which might not be true. if meta.Stats.NumTombstones > 0 && meta.Stats.NumTombstones >= meta.Stats.NumSeries { return []string{dms[i].dir}, nil } @@ -372,7 +372,7 @@ func splitByRange(ds []dirMeta, tr int64) [][]dirMeta { t0 = tr * ((m.MinTime - tr + 1) / tr) } // Skip blocks that don't fall into the range. This can happen via mis-alignment or - // by being the multiple of the intended range. + // by being a multiple of the intended range. if m.MaxTime > t0+tr { i++ continue @@ -395,7 +395,7 @@ func splitByRange(ds []dirMeta, tr int64) [][]dirMeta { return splitDirs } -// CompactBlockMetas merges many block metas into one, combining it's source blocks together +// CompactBlockMetas merges many block metas into one, combining its source blocks together // and adjusting compaction level. Min/Max time of result block meta covers all input blocks. func CompactBlockMetas(uid ulid.ULID, blocks ...*BlockMeta) *BlockMeta { res := &BlockMeta{ @@ -833,7 +833,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa chksIter = s.Iterator(chksIter) chks = chks[:0] for chksIter.Next() { - // We are not iterating in streaming way over chunk as + // We are not iterating in a streaming way over chunks as // it's more efficient to do bulk write for index and // chunk file purposes. chks = append(chks, chksIter.At()) @@ -842,7 +842,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa return fmt.Errorf("chunk iter: %w", err) } - // Skip the series with all deleted chunks. + // Skip series with all deleted chunks. if len(chks) == 0 { continue } diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index b2d2ea6e7..7a353a556 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -22,6 +22,7 @@ import ( "os" "path" "path/filepath" + "strconv" "sync" "testing" "time" @@ -1129,7 +1130,7 @@ func BenchmarkCompactionFromHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - app.Append(0, labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) + app.Append(0, labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) } require.NoError(b, app.Commit()) } @@ -1161,7 +1162,7 @@ func BenchmarkCompactionFromOOOHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - lbls := labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) + lbls := labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) _, err = app.Append(0, lbls, int64(totalSamples), 0) require.NoError(b, err) for ts := 0; ts < totalSamples; ts++ { @@ -1297,7 +1298,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) diff --git a/tsdb/db.go b/tsdb/db.go index 22292ab16..bca3c9948 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -206,7 +206,7 @@ type DB struct { compactor Compactor blocksToDelete BlocksToDeleteFunc - // Mutex for that must be held when modifying the general block layout or lastGarbageCollectedMmapRef. + // mtx must be held when modifying the general block layout or lastGarbageCollectedMmapRef. mtx sync.RWMutex blocks []*Block @@ -383,26 +383,36 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger - dir string - closers []io.Closer - closed chan struct{} + logger log.Logger + dir string + sandboxDir string + closers []io.Closer + closed chan struct{} } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } + if sandboxDirRoot == "" { + sandboxDirRoot = dir + } + sandboxDir, err := os.MkdirTemp(sandboxDirRoot, "tmp_dbro_sandbox") + if err != nil { + return nil, fmt.Errorf("setting up sandbox dir: %w", err) + } + if l == nil { l = log.NewNopLogger() } return &DBReadOnly{ - logger: l, - dir: dir, - closed: make(chan struct{}), + logger: l, + dir: dir, + sandboxDir: sandboxDir, + closed: make(chan struct{}), }, nil } @@ -491,7 +501,14 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + // Hard link the chunk files to a dir in db.sandboxDir in case the Head needs to truncate some of them + // or cut new ones while replaying the WAL. + // See https://github.com/prometheus/prometheus/issues/11618. + err = chunks.HardLinkChunkFiles(mmappedChunksDir(db.dir), mmappedChunksDir(db.sandboxDir)) + if err != nil { + return nil, err + } + opts.ChunkDirRoot = db.sandboxDir head, err := NewHead(nil, db.logger, nil, nil, opts, NewHeadStats()) if err != nil { return nil, err @@ -519,7 +536,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + opts.ChunkDirRoot = db.sandboxDir head, err = NewHead(nil, db.logger, w, wbl, opts, NewHeadStats()) if err != nil { return nil, err @@ -690,8 +707,14 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { return block, nil } -// Close all block readers. +// Close all block readers and delete the sandbox dir. func (db *DBReadOnly) Close() error { + defer func() { + // Delete the temporary sandbox directory that was created when opening the DB. + if err := os.RemoveAll(db.sandboxDir); err != nil { + level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + } + }() select { case <-db.closed: return ErrClosed @@ -1431,7 +1454,7 @@ func (db *DB) reloadBlocks() (err error) { db.metrics.reloads.Inc() }() - // Now that we reload TSDB every minute, there is high chance for race condition with a reload + // Now that we reload TSDB every minute, there is a high chance for a race condition with a reload // triggered by CleanTombstones(). We need to lock the reload to avoid the situation where // a normal reload and CleanTombstones try to delete the same block. db.mtx.Lock() diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 71b2f05ac..5965e5317 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -25,6 +25,7 @@ import ( "os" "path" "path/filepath" + "runtime" "sort" "strconv" "sync" @@ -1065,7 +1066,7 @@ func TestWALSegmentSizeOptions(t *testing.T) { for i := int64(0); i < 155; i++ { app := db.Appender(context.Background()) - ref, err := app.Append(0, labels.FromStrings("wal"+fmt.Sprintf("%d", i), "size"), i, rand.Float64()) + ref, err := app.Append(0, labels.FromStrings("wal"+strconv.Itoa(int(i)), "size"), i, rand.Float64()) require.NoError(t, err) for j := int64(1); j <= 78; j++ { _, err := app.Append(ref, labels.EmptyLabels(), i+j, rand.Float64()) @@ -2494,7 +2495,7 @@ func TestDBReadOnly(t *testing.T) { } // Open a read only db and ensure that the API returns the same result as the normal DB. - dbReadOnly, err := OpenDBReadOnly(dbDir, logger) + dbReadOnly, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) defer func() { require.NoError(t, dbReadOnly.Close()) }() @@ -2548,10 +2549,14 @@ func TestDBReadOnly(t *testing.T) { // TestDBReadOnlyClosing ensures that after closing the db // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { - dbDir := t.TempDir() - db, err := OpenDBReadOnly(dbDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + sandboxDir := t.TempDir() + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) require.NoError(t, err) + // The sandboxDir was there. + require.DirExists(t, db.sandboxDir) require.NoError(t, db.Close()) + // The sandboxDir was deleted when closing. + require.NoDirExists(t, db.sandboxDir) require.Equal(t, db.Close(), ErrClosed) _, err = db.Blocks() require.Equal(t, err, ErrClosed) @@ -2587,7 +2592,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { } // Flush WAL. - db, err := OpenDBReadOnly(dbDir, logger) + db, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) flush := t.TempDir() @@ -2595,7 +2600,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.NoError(t, db.Close()) // Reopen the DB from the flushed WAL block. - db, err = OpenDBReadOnly(flush, logger) + db, err = OpenDBReadOnly(flush, "", logger) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) }() blocks, err := db.Blocks() @@ -2624,6 +2629,80 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.Equal(t, 1000.0, sum) } +func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { + countChunks := func(dir string) int { + files, err := os.ReadDir(mmappedChunksDir(dir)) + require.NoError(t, err) + return len(files) + } + + dirHash := func(dir string) (hash []byte) { + // Windows requires the DB to be closed: "xxx\lock: The process cannot access the file because it is being used by another process." + // But closing the DB alters the directory in this case (it'll cut a new chunk). + if runtime.GOOS != "windows" { + hash = testutil.DirHash(t, dir) + } + return + } + + spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { + dBDirHash := dirHash(dir) + // Bootsrap a RO db from the same dir and set up a querier. + dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) + require.NoError(t, err) + require.Equal(t, chunksCount, countChunks(dir)) + q, err := dbReadOnly.Querier(math.MinInt, math.MaxInt) + require.NoError(t, err) + require.NoError(t, q.Close()) + require.NoError(t, dbReadOnly.Close()) + // The RO Head doesn't alter RW db chunks_head/. + require.Equal(t, chunksCount, countChunks(dir)) + require.Equal(t, dirHash(dir), dBDirHash) + } + + t.Run("doesn't cut chunks while replaying WAL", func(t *testing.T) { + db := openTestDB(t, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + + // Append until the first mmaped head chunk. + for i := 0; i < 121; i++ { + app := db.Appender(context.Background()) + _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 0) + + // The RW Head should have no problem cutting its own chunk, + // this also proves that a chunk needed to be cut. + require.NotPanics(t, func() { db.ForceHeadMMap() }) + require.Equal(t, 1, countChunks(db.dir)) + }) + + t.Run("doesn't truncate corrupted chunks", func(t *testing.T) { + db := openTestDB(t, nil, nil) + require.NoError(t, db.Close()) + + // Simulate a corrupted chunk: without a header. + _, err := os.Create(path.Join(mmappedChunksDir(db.dir), "000001")) + require.NoError(t, err) + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 1) + + // The RW Head should have no problem truncating its corrupted file: + // this proves that the chunk needed to be truncated. + db, err = Open(db.dir, nil, nil, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + require.NoError(t, err) + require.Equal(t, 0, countChunks(db.dir)) + }) +} + func TestDBCannotSeePartialCommits(t *testing.T) { if defaultIsolationDisabled { t.Skip("skipping test since tsdb isolation is disabled") @@ -4495,7 +4574,7 @@ func TestOOOCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) require.Len(t, ms.ooo.oooMmappedChunks, 14) // 7 original, 7 duplicate. } checkNonEmptyOOOChunk(series1) @@ -4636,7 +4715,7 @@ func TestOOOCompactionWithNormalCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -4737,7 +4816,7 @@ func TestOOOCompactionWithDisabledWriteLog(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -5438,8 +5517,8 @@ func TestWBLAndMmapReplay(t *testing.T) { addedRecs++ require.NoError(t, newWbl.Log(rec)) } - require.Greater(t, markers, 0) - require.Greater(t, addedRecs, 0) + require.Positive(t, markers) + require.Positive(t, addedRecs) require.NoError(t, newWbl.Close()) require.NoError(t, sr.Close()) require.NoError(t, os.RemoveAll(wblDir)) diff --git a/tsdb/docs/format/head_chunks.md b/tsdb/docs/format/head_chunks.md index 813d4008c..5737f4205 100644 --- a/tsdb/docs/format/head_chunks.md +++ b/tsdb/docs/format/head_chunks.md @@ -27,10 +27,10 @@ in-file offset (lower 4 bytes) and segment sequence number (upper 4 bytes). # Chunk -Unlike chunks in the on-disk blocks, here we additionally store series -reference that the chunks belongs to and the mint/maxt of the chunks. This is -because we don't have an index associated with these chunks, hence these meta -information are used while replaying the chunks. +Unlike chunks in the on-disk blocks, here we additionally store the series +reference that each chunk belongs to and the mint/maxt of the chunks. This is +because we don't have an index associated with these chunks, hence this metadata +is used while replaying the chunks. ``` ┌─────────────────────┬───────────────────────┬───────────────────────┬───────────────────┬───────────────┬──────────────┬────────────────┐ diff --git a/tsdb/docs/format/index.md b/tsdb/docs/format/index.md index 53b77d9ab..e0ef21bd5 100644 --- a/tsdb/docs/format/index.md +++ b/tsdb/docs/format/index.md @@ -40,7 +40,7 @@ Most of the sections described below start with a `len` field. It always specifi ### Symbol Table -The symbol table holds a sorted list of deduplicated strings that occurred in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size. +The symbol table holds a sorted list of deduplicated strings that occur in label pairs of the stored series. They can be referenced from subsequent sections and significantly reduce the total index size. The section contains a sequence of the string entries, each prefixed with the string's length in raw bytes. All strings are utf-8 encoded. Strings are referenced by sequential indexing. The strings are sorted in lexicographically ascending order. diff --git a/tsdb/docs/usage.md b/tsdb/docs/usage.md index e70b24813..7bc1ae6c5 100644 --- a/tsdb/docs/usage.md +++ b/tsdb/docs/usage.md @@ -1,6 +1,6 @@ # Usage -TSDB can be - and is - used by other applications such as [Cortex](https://cortexmetrics.io/) and [Thanos](https://thanos.io/). +TSDB can be - and is - used by other applications such as [Cortex](https://cortexmetrics.io/), [Thanos](https://thanos.io/), and [Grafana Mimir](https://grafana.com/oss/mimir/). This directory contains documentation for any developers who wish to work on or with TSDB. For a full example of instantiating a database, adding and querying data, see the [tsdb example in the docs](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb). @@ -18,7 +18,7 @@ A `DB` has the following main components: * [`Head`](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb#DB.Head) * [Blocks (persistent blocks)](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb#DB.Blocks) -The `Head` is responsible for a lot. Here are its main components: +The `Head` is responsible for a lot. Here are its main components: * [WAL](https://pkg.go.dev/github.com/prometheus/prometheus/tsdb/wal#WAL) (Write Ahead Log). * [`stripeSeries`](https://github.com/prometheus/prometheus/blob/411021ada9ab41095923b8d2df9365b632fd40c3/tsdb/head.go#L1292): diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 3dd784c62..7545ab9a6 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -37,7 +37,7 @@ const ( type CircularExemplarStorage struct { lock sync.RWMutex - exemplars []*circularBufferEntry + exemplars []circularBufferEntry nextIndex int metrics *ExemplarMetrics @@ -111,7 +111,7 @@ func NewExemplarMetrics(reg prometheus.Registerer) *ExemplarMetrics { return &m } -// NewCircularExemplarStorage creates an circular in memory exemplar storage. +// NewCircularExemplarStorage creates a circular in memory exemplar storage. // If we assume the average case 95 bytes per exemplar we can fit 5651272 exemplars in // 1GB of extra memory, accounting for the fact that this is heap allocated space. // If len <= 0, then the exemplar storage is essentially a noop storage but can later be @@ -121,7 +121,7 @@ func NewCircularExemplarStorage(length int64, m *ExemplarMetrics) (ExemplarStora length = 0 } c := &CircularExemplarStorage{ - exemplars: make([]*circularBufferEntry, length), + exemplars: make([]circularBufferEntry, length), index: make(map[string]*indexEntry, length/estimatedExemplarsPerSeries), metrics: m, } @@ -214,12 +214,12 @@ func (ce *CircularExemplarStorage) ValidateExemplar(l labels.Labels, e exemplar. // Optimize by moving the lock to be per series (& benchmark it). ce.lock.RLock() defer ce.lock.RUnlock() - return ce.validateExemplar(seriesLabels, e, false) + return ce.validateExemplar(ce.index[string(seriesLabels)], e, false) } // Not thread safe. The appended parameters tells us whether this is an external validation, or internal // as a result of an AddExemplar call, in which case we should update any relevant metrics. -func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemplar, appended bool) error { +func (ce *CircularExemplarStorage) validateExemplar(idx *indexEntry, e exemplar.Exemplar, appended bool) error { if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -239,8 +239,7 @@ func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemp return err } - idx, ok := ce.index[string(key)] - if !ok { + if idx == nil { return nil } @@ -292,7 +291,7 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) - ce.exemplars = make([]*circularBufferEntry, l) + ce.exemplars = make([]circularBufferEntry, l) ce.index = make(map[string]*indexEntry, l/estimatedExemplarsPerSeries) ce.nextIndex = 0 @@ -311,10 +310,11 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // This way we don't migrate exemplars that would just be overwritten when migrating later exemplars. startIndex := (oldNextIndex - count + int64(len(oldBuffer))) % int64(len(oldBuffer)) + var buf [1024]byte for i := int64(0); i < count; i++ { idx := (startIndex + i) % int64(len(oldBuffer)) - if entry := oldBuffer[idx]; entry != nil { - ce.migrate(entry) + if oldBuffer[idx].ref != nil { + ce.migrate(&oldBuffer[idx], buf[:]) migrated++ } } @@ -328,9 +328,8 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // migrate is like AddExemplar but reuses existing structs. Expected to be called in batch and requires // external lock and does not compute metrics. -func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { - var buf [1024]byte - seriesLabels := entry.ref.seriesLabels.Bytes(buf[:]) +func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byte) { + seriesLabels := entry.ref.seriesLabels.Bytes(buf[:0]) idx, ok := ce.index[string(seriesLabels)] if !ok { @@ -344,7 +343,7 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { idx.newest = ce.nextIndex entry.next = noExemplar - ce.exemplars[ce.nextIndex] = entry + ce.exemplars[ce.nextIndex] = *entry ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) } @@ -362,7 +361,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp ce.lock.Lock() defer ce.lock.Unlock() - err := ce.validateExemplar(seriesLabels, e, true) + idx, ok := ce.index[string(seriesLabels)] + err := ce.validateExemplar(idx, e, true) if err != nil { if errors.Is(err, storage.ErrDuplicateExemplar) { // Duplicate exemplar, noop. @@ -371,25 +371,23 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp return err } - _, ok := ce.index[string(seriesLabels)] if !ok { - ce.index[string(seriesLabels)] = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + idx = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + ce.index[string(seriesLabels)] = idx } else { - ce.exemplars[ce.index[string(seriesLabels)].newest].next = ce.nextIndex + ce.exemplars[idx.newest].next = ce.nextIndex } - if prev := ce.exemplars[ce.nextIndex]; prev == nil { - ce.exemplars[ce.nextIndex] = &circularBufferEntry{} - } else { + if prev := &ce.exemplars[ce.nextIndex]; prev.ref != nil { // There exists an exemplar already on this ce.nextIndex entry, // drop it, to make place for others. - var buf [1024]byte - prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) if prev.next == noExemplar { // Last item for this series, remove index entry. + var buf [1024]byte + prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) delete(ce.index, string(prevLabels)) } else { - ce.index[string(prevLabels)].oldest = prev.next + prev.ref.oldest = prev.next } } @@ -397,8 +395,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp // since this is the first exemplar stored for this series. ce.exemplars[ce.nextIndex].next = noExemplar ce.exemplars[ce.nextIndex].exemplar = e - ce.exemplars[ce.nextIndex].ref = ce.index[string(seriesLabels)] - ce.index[string(seriesLabels)].newest = ce.nextIndex + ce.exemplars[ce.nextIndex].ref = idx + idx.newest = ce.nextIndex ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) @@ -416,15 +414,15 @@ func (ce *CircularExemplarStorage) computeMetrics() { return } - if next := ce.exemplars[ce.nextIndex]; next != nil { + if ce.exemplars[ce.nextIndex].ref != nil { ce.metrics.exemplarsInStorage.Set(float64(len(ce.exemplars))) - ce.metrics.lastExemplarsTs.Set(float64(next.exemplar.Ts) / 1000) + ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[ce.nextIndex].exemplar.Ts) / 1000) return } // We did not yet fill the buffer. ce.metrics.exemplarsInStorage.Set(float64(ce.nextIndex)) - if ce.exemplars[0] != nil { + if ce.exemplars[0].ref != nil { ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[0].exemplar.Ts) / 1000) } } @@ -438,7 +436,7 @@ func (ce *CircularExemplarStorage) IterateExemplars(f func(seriesLabels labels.L idx := ce.nextIndex l := len(ce.exemplars) for i := 0; i < l; i, idx = i+1, (idx+1)%l { - if ce.exemplars[idx] == nil { + if ce.exemplars[idx].ref == nil { continue } err := f(ce.exemplars[idx].ref.seriesLabels, ce.exemplars[idx].exemplar) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 21030e457..7723ec389 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -415,27 +415,29 @@ func BenchmarkAddExemplar(b *testing.B) { // before adding. exLabels := labels.FromStrings("trace_id", "89620921") - for _, n := range []int{10000, 100000, 1000000} { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - for j := 0; j < b.N; j++ { - b.StopTimer() - exs, err := NewCircularExemplarStorage(int64(n), eMetrics) - require.NoError(b, err) - es := exs.(*CircularExemplarStorage) - var l labels.Labels - b.StartTimer() + for _, capacity := range []int{1000, 10000, 100000} { + for _, n := range []int{10000, 100000, 1000000} { + b.Run(fmt.Sprintf("%d/%d", n, capacity), func(b *testing.B) { + for j := 0; j < b.N; j++ { + b.StopTimer() + exs, err := NewCircularExemplarStorage(int64(capacity), eMetrics) + require.NoError(b, err) + es := exs.(*CircularExemplarStorage) + var l labels.Labels + b.StartTimer() - for i := 0; i < n; i++ { - if i%100 == 0 { - l = labels.FromStrings("service", strconv.Itoa(i)) - } - err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) - if err != nil { - require.NoError(b, err) + for i := 0; i < n; i++ { + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } + err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) + if err != nil { + require.NoError(b, err) + } } } - } - }) + }) + } } } @@ -480,8 +482,11 @@ func BenchmarkResizeExemplars(b *testing.B) { require.NoError(b, err) es := exs.(*CircularExemplarStorage) + var l labels.Labels for i := 0; i < int(float64(tc.startSize)*float64(1.5)); i++ { - l := labels.FromStrings("service", strconv.Itoa(i)) + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i)}) if err != nil { diff --git a/tsdb/head.go b/tsdb/head.go index 8b3d9787c..d5f7144fd 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -310,12 +310,22 @@ func (h *Head) resetInMemoryState() error { return err } + if h.series != nil { + // reset the existing series to make sure we call the appropriated hooks + // and increment the series removed metrics + fs := h.series.iterForDeletion(func(_ int, _ uint64, s *memSeries, flushedForCallback map[chunks.HeadSeriesRef]labels.Labels) { + // All series should be flushed + flushedForCallback[s.ref] = s.lset + }) + h.metrics.seriesRemoved.Add(float64(fs)) + } + + h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.iso = newIsolation(h.opts.IsolationDisabled) h.oooIso = newOOOIsolation() - + h.numSeries.Store(0) h.exemplarMetrics = em h.exemplars = es - h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.postings = index.NewUnorderedMemPostings() h.tombstones = tombstones.NewMemTombstones() h.deleted = map[chunks.HeadSeriesRef]int{} @@ -1861,11 +1871,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st // minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series. func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ int, _, _ int64, minMmapFile int) { var ( - deleted = map[storage.SeriesRef]struct{}{} - rmChunks = 0 - actualMint int64 = math.MaxInt64 - minOOOTime int64 = math.MaxInt64 - deletedFromPrevStripe = 0 + deleted = map[storage.SeriesRef]struct{}{} + rmChunks = 0 + actualMint int64 = math.MaxInt64 + minOOOTime int64 = math.MaxInt64 ) minMmapFile = math.MaxInt32 @@ -1923,27 +1932,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( deletedForCallback[series.ref] = series.lset } - // Run through all series shard by shard, checking which should be deleted. - for i := 0; i < s.size; i++ { - deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe) - s.locks[i].Lock() - - // Delete conflicts first so seriesHashmap.del doesn't move them to the `unique` field, - // after deleting `unique`. - for hash, all := range s.hashes[i].conflicts { - for _, series := range all { - check(i, hash, series, deletedForCallback) - } - } - for hash, series := range s.hashes[i].unique { - check(i, hash, series, deletedForCallback) - } - - s.locks[i].Unlock() - - s.seriesLifecycleCallback.PostDeletion(deletedForCallback) - deletedFromPrevStripe = len(deletedForCallback) - } + s.iterForDeletion(check) if actualMint == math.MaxInt64 { actualMint = mint @@ -1952,6 +1941,35 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( return deleted, rmChunks, actualMint, minOOOTime, minMmapFile } +// The iterForDeletion function iterates through all series, invoking the checkDeletedFunc for each. +// The checkDeletedFunc takes a map as input and should add to it all series that were deleted and should be included +// when invoking the PostDeletion hook. +func (s *stripeSeries) iterForDeletion(checkDeletedFunc func(int, uint64, *memSeries, map[chunks.HeadSeriesRef]labels.Labels)) int { + seriesSetFromPrevStripe := 0 + totalDeletedSeries := 0 + // Run through all series shard by shard + for i := 0; i < s.size; i++ { + seriesSet := make(map[chunks.HeadSeriesRef]labels.Labels, seriesSetFromPrevStripe) + s.locks[i].Lock() + // Iterate conflicts first so f doesn't move them to the `unique` field, + // after deleting `unique`. + for hash, all := range s.hashes[i].conflicts { + for _, series := range all { + checkDeletedFunc(i, hash, series, seriesSet) + } + } + + for hash, series := range s.hashes[i].unique { + checkDeletedFunc(i, hash, series, seriesSet) + } + s.locks[i].Unlock() + s.seriesLifecycleCallback.PostDeletion(seriesSet) + totalDeletedSeries += len(seriesSet) + seriesSetFromPrevStripe = len(seriesSet) + } + return totalDeletedSeries +} + func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries { i := uint64(id) & uint64(s.size-1) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index efd573b41..224f65314 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -1467,8 +1467,8 @@ func (s *memSeries) mmapChunks(chunkDiskMapper *chunks.ChunkDiskMapper) (count i return } - // Write chunks starting from the oldest one and stop before we get to current s.headChunk. - // If we have this chain: s.headChunk{t4} -> t3 -> t2 -> t1 -> t0 + // Write chunks starting from the oldest one and stop before we get to current s.headChunks. + // If we have this chain: s.headChunks{t4} -> t3 -> t2 -> t1 -> t0 // then we need to write chunks t0 to t3, but skip s.headChunks. for i := s.headChunks.len() - 1; i > 0; i-- { chk := s.headChunks.atOffset(i) diff --git a/tsdb/head_read.go b/tsdb/head_read.go index 45bbc81f1..df15abcd5 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -121,6 +121,10 @@ func (h *headIndexReader) Postings(ctx context.Context, name string, values ...s } } +func (h *headIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + return h.head.postings.PostingsForLabelMatching(ctx, name, match) +} + func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { series := make([]*memSeries, 0, 128) diff --git a/tsdb/head_read_test.go b/tsdb/head_read_test.go index de97d70a5..8d835e943 100644 --- a/tsdb/head_read_test.go +++ b/tsdb/head_read_test.go @@ -14,6 +14,7 @@ package tsdb import ( + "context" "fmt" "sync" "testing" @@ -552,3 +553,25 @@ func TestMemSeries_chunk(t *testing.T) { }) } } + +func TestHeadIndexReader_PostingsForLabelMatching(t *testing.T) { + testPostingsForLabelMatching(t, 0, func(t *testing.T, series []labels.Labels) IndexReader { + opts := DefaultHeadOptions() + opts.ChunkRange = 1000 + opts.ChunkDirRoot = t.TempDir() + h, err := NewHead(nil, nil, nil, nil, opts, nil) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, h.Close()) + }) + app := h.Appender(context.Background()) + for _, s := range series { + app.Append(0, s, 0, 0) + } + require.NoError(t, app.Commit()) + + ir, err := h.Index() + require.NoError(t, err) + return ir + }) +} diff --git a/tsdb/head_test.go b/tsdb/head_test.go index d9631b3b9..bb437ab59 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3383,7 +3383,7 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) { func TestAppendHistogram(t *testing.T) { l := labels.FromStrings("a", "b") for _, numHistograms := range []int{1, 10, 150, 200, 250, 300} { - t.Run(fmt.Sprintf("%d", numHistograms), func(t *testing.T) { + t.Run(strconv.Itoa(numHistograms), func(t *testing.T) { head, _ := newTestHead(t, 1000, wlog.CompressionNone, false) t.Cleanup(func() { require.NoError(t, head.Close()) @@ -3557,7 +3557,7 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { expMmapChunks = append(expMmapChunks, &cpy) } expHeadChunkSamples := ms.headChunks.chunk.NumSamples() - require.Greater(t, expHeadChunkSamples, 0) + require.Positive(t, expHeadChunkSamples) // Series with mix of histograms and float. s2 := labels.FromStrings("a", "b2") @@ -3692,7 +3692,7 @@ func TestChunkSnapshot(t *testing.T) { e := ex{ seriesLabels: lbls, e: exemplar.Exemplar{ - Labels: labels.FromStrings("trace_id", fmt.Sprintf("%d", rand.Int())), + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), Value: rand.Float64(), Ts: ts, }, @@ -4007,6 +4007,9 @@ func TestSnapshotError(t *testing.T) { require.NoError(t, err) f, err := os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) require.NoError(t, err) + // Create snapshot backup to be restored on future test cases. + snapshotBackup, err := io.ReadAll(f) + require.NoError(t, err) _, err = f.WriteAt([]byte{0b11111111}, 18) require.NoError(t, err) require.NoError(t, f.Close()) @@ -4021,10 +4024,44 @@ func TestSnapshotError(t *testing.T) { // There should be no series in the memory after snapshot error since WAL was removed. require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Equal(t, uint64(0), head.NumSeries()) require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) tm, err = head.tombstones.Get(1) require.NoError(t, err) require.Empty(t, tm) + require.NoError(t, head.Close()) + + // Test corruption in the middle of the snapshot. + f, err = os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) + require.NoError(t, err) + _, err = f.WriteAt(snapshotBackup, 0) + require.NoError(t, err) + _, err = f.WriteAt([]byte{0b11111111}, 300) + require.NoError(t, err) + require.NoError(t, f.Close()) + + c := &countSeriesLifecycleCallback{} + opts := head.opts + opts.SeriesCallback = c + + w, err = wlog.NewSize(nil, nil, head.wal.Dir(), 32768, wlog.CompressionNone) + require.NoError(t, err) + head, err = NewHead(prometheus.NewRegistry(), nil, w, nil, head.opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + + // There should be no series in the memory after snapshot error since WAL was removed. + require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) + require.Equal(t, uint64(0), head.NumSeries()) + + // Since the snapshot could replay certain series, we continue invoking the create hooks. + // In such instances, we need to ensure that we also trigger the delete hooks when resetting the memory. + require.Equal(t, int64(2), c.created.Load()) + require.Equal(t, int64(2), c.deleted.Load()) + + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesRemoved)) + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesCreated)) } func TestHistogramMetrics(t *testing.T) { @@ -4601,7 +4638,7 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) { require.NoError(t, err) require.NotEqual(t, "", name) require.Equal(t, 0, idx) - require.Greater(t, offset, 0) + require.Positive(t, offset) } // TestWBLReplay checks the replay at a low level. @@ -5032,7 +5069,7 @@ func TestOOOAppendWithNoSeries(t *testing.T) { require.Equal(t, expSamples, ms.headChunks.chunk.NumSamples()) } - newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", fmt.Sprintf("%d", idx)) } + newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", strconv.Itoa(idx)) } s1 := newLabels(1) appendSample(s1, 300) // At 300m. @@ -5829,3 +5866,14 @@ func TestHeadCompactableDoesNotCompactEmptyHead(t *testing.T) { require.False(t, head.compactable()) } + +type countSeriesLifecycleCallback struct { + created atomic.Int64 + deleted atomic.Int64 +} + +func (c *countSeriesLifecycleCallback) PreCreation(labels.Labels) error { return nil } +func (c *countSeriesLifecycleCallback) PostCreation(labels.Labels) { c.created.Inc() } +func (c *countSeriesLifecycleCallback) PostDeletion(s map[chunks.HeadSeriesRef]labels.Labels) { + c.deleted.Add(int64(len(s))) +} diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 076768f4e..41f7dd46b 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -1496,7 +1496,7 @@ Outer: } default: - // This is a record type we don't understand. It is either and old format from earlier versions, + // This is a record type we don't understand. It is either an old format from earlier versions, // or a new format and the code was rolled back to old version. loopErr = fmt.Errorf("unsupported snapshot record type 0b%b", rec[0]) break Outer diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 89c2041a7..480e6a8fc 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -51,6 +51,9 @@ const ( indexFilename = "index" seriesByteAlign = 16 + + // checkContextEveryNIterations is used in some tight loops to check if the context is done. + checkContextEveryNIterations = 128 ) type indexWriterSeries struct { @@ -158,7 +161,7 @@ type Writer struct { postingsEncoder PostingsEncoder } -// TOC represents index Table Of Content that states where each section of index starts. +// TOC represents the index Table Of Contents that states where each section of the index starts. type TOC struct { Symbols uint64 Series uint64 @@ -168,7 +171,7 @@ type TOC struct { PostingsTable uint64 } -// NewTOCFromByteSlice return parsed TOC from given index byte slice. +// NewTOCFromByteSlice returns a parsed TOC from the given index byte slice. func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) { if bs.Len() < indexTOCLen { return nil, encoding.ErrInvalidSize @@ -1536,36 +1539,14 @@ func (r *Reader) LabelValues(ctx context.Context, name string, matchers ...*labe if len(e) == 0 { return nil, nil } + values := make([]string, 0, len(e)*symbolFactor) - - d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) - d.Skip(e[0].off) lastVal := e[len(e)-1].value - - skip := 0 - for d.Err() == nil && ctx.Err() == nil { - if skip == 0 { - // These are always the same number of bytes, - // and it's faster to skip than parse. - skip = d.Len() - d.Uvarint() // Keycount. - d.UvarintBytes() // Label name. - skip -= d.Len() - } else { - d.Skip(skip) - } - s := yoloString(d.UvarintBytes()) // Label value. - values = append(values, s) - if s == lastVal { - break - } - d.Uvarint64() // Offset. - } - if d.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", d.Err()) - } - - return values, ctx.Err() + err := r.traversePostingOffsets(ctx, e[0].off, func(val string, _ uint64) (bool, error) { + values = append(values, val) + return val != lastVal, nil + }) + return values, err } // LabelNamesFor returns all the label names for the series referred to by IDs. @@ -1662,6 +1643,44 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch return nil } +// traversePostingOffsets traverses r's posting offsets table, starting at off, and calls cb with every label value and postings offset. +// If cb returns false (or an error), the traversing is interrupted. +func (r *Reader) traversePostingOffsets(ctx context.Context, off int, cb func(string, uint64) (bool, error)) error { + // Don't Crc32 the entire postings offset table, this is very slow + // so hope any issues were caught at startup. + d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) + d.Skip(off) + skip := 0 + ctxErr := ctx.Err() + for d.Err() == nil && ctxErr == nil { + if skip == 0 { + // These are always the same number of bytes, + // and it's faster to skip than to parse. + skip = d.Len() + d.Uvarint() // Keycount. + d.UvarintBytes() // Label name. + skip -= d.Len() + } else { + d.Skip(skip) + } + v := yoloString(d.UvarintBytes()) // Label value. + postingsOff := d.Uvarint64() // Offset. + if ok, err := cb(v, postingsOff); err != nil { + return err + } else if !ok { + break + } + ctxErr = ctx.Err() + } + if d.Err() != nil { + return fmt.Errorf("get postings offset entry: %w", d.Err()) + } + if ctxErr != nil { + return fmt.Errorf("get postings offset entry: %w", ctxErr) + } + return nil +} + func (r *Reader) Postings(ctx context.Context, name string, values ...string) (Postings, error) { if r.version == FormatV1 { e, ok := r.postingsV1[name] @@ -1696,7 +1715,6 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P slices.Sort(values) // Values must be in order so we can step through the table on disk. res := make([]Postings, 0, len(values)) - skip := 0 valueIndex := 0 for valueIndex < len(values) && values[valueIndex] < e[0].value { // Discard values before the start. @@ -1714,33 +1732,15 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P // Need to look from previous entry. i-- } - // Don't Crc32 the entire postings offset table, this is very slow - // so hope any issues were caught at startup. - d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsTable), nil) - d.Skip(e[i].off) - // Iterate on the offset table. - var postingsOff uint64 // The offset into the postings table. - for d.Err() == nil && ctx.Err() == nil { - if skip == 0 { - // These are always the same number of bytes, - // and it's faster to skip than parse. - skip = d.Len() - d.Uvarint() // Keycount. - d.UvarintBytes() // Label name. - skip -= d.Len() - } else { - d.Skip(skip) - } - v := d.UvarintBytes() // Label value. - postingsOff = d.Uvarint64() // Offset. - for string(v) >= value { - if string(v) == value { + if err := r.traversePostingOffsets(ctx, e[i].off, func(val string, postingsOff uint64) (bool, error) { + for val >= value { + if val == value { // Read from the postings table. d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) _, p, err := r.dec.Postings(d2.Get()) if err != nil { - return nil, fmt.Errorf("decode postings: %w", err) + return false, fmt.Errorf("decode postings: %w", err) } res = append(res, p) } @@ -1752,20 +1752,77 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P } if i+1 == len(e) || value >= e[i+1].value || valueIndex == len(values) { // Need to go to a later postings offset entry, if there is one. - break + return false, nil } - } - if d.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", d.Err()) - } - if ctx.Err() != nil { - return nil, fmt.Errorf("get postings offset entry: %w", ctx.Err()) + return true, nil + }); err != nil { + return nil, err } } return Merge(ctx, res...), nil } +func (r *Reader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { + if r.version == FormatV1 { + return r.postingsForLabelMatchingV1(ctx, name, match) + } + + e := r.postings[name] + if len(e) == 0 { + return EmptyPostings() + } + + lastVal := e[len(e)-1].value + var its []Postings + if err := r.traversePostingOffsets(ctx, e[0].off, func(val string, postingsOff uint64) (bool, error) { + if match(val) { + // We want this postings iterator since the value is a match + postingsDec := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) + _, p, err := r.dec.PostingsFromDecbuf(postingsDec) + if err != nil { + return false, fmt.Errorf("decode postings: %w", err) + } + its = append(its, p) + } + return val != lastVal, nil + }); err != nil { + return ErrPostings(err) + } + + return Merge(ctx, its...) +} + +func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, match func(string) bool) Postings { + e := r.postingsV1[name] + if len(e) == 0 { + return EmptyPostings() + } + + var its []Postings + count := 1 + for val, offset := range e { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + count++ + if !match(val) { + continue + } + + // Read from the postings table. + d := encoding.NewDecbufAt(r.b, int(offset), castagnoliTable) + _, p, err := r.dec.PostingsFromDecbuf(d) + if err != nil { + return ErrPostings(fmt.Errorf("decode postings: %w", err)) + } + + its = append(its, p) + } + + return Merge(ctx, its...) +} + // SortedPostings returns the given postings list reordered so that the backing series // are sorted. func (r *Reader) SortedPostings(p Postings) Postings { @@ -1856,6 +1913,11 @@ type Decoder struct { // Postings returns a postings list for b and its number of elements. func (dec *Decoder) Postings(b []byte) (int, Postings, error) { d := encoding.Decbuf{B: b} + return dec.PostingsFromDecbuf(d) +} + +// PostingsFromDecbuf returns a postings list for d and its number of elements. +func (dec *Decoder) PostingsFromDecbuf(d encoding.Decbuf) (int, Postings, error) { n := d.Be32int() l := d.Get() if d.Err() != nil { diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index c451c38dd..5c6d64e07 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -20,7 +20,9 @@ import ( "hash/crc32" "os" "path/filepath" + "slices" "sort" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -160,39 +162,14 @@ func TestIndexRW_Create_Open(t *testing.T) { } func TestIndexRW_Postings(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - - series := []labels.Labels{ - labels.FromStrings("a", "1", "b", "1"), - labels.FromStrings("a", "1", "b", "2"), - labels.FromStrings("a", "1", "b", "3"), - labels.FromStrings("a", "1", "b", "4"), + var input indexWriterSeriesSlice + for i := 1; i < 5; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("a", "1", "b", strconv.Itoa(i)), + }) } - - require.NoError(t, iw.AddSymbol("1")) - require.NoError(t, iw.AddSymbol("2")) - require.NoError(t, iw.AddSymbol("3")) - require.NoError(t, iw.AddSymbol("4")) - require.NoError(t, iw.AddSymbol("a")) - require.NoError(t, iw.AddSymbol("b")) - - // Postings lists are only written if a series with the respective - // reference was added before. - require.NoError(t, iw.AddSeries(1, series[0])) - require.NoError(t, iw.AddSeries(2, series[1])) - require.NoError(t, iw.AddSeries(3, series[2])) - require.NoError(t, iw.AddSeries(4, series[3])) - - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) + ir, fn, _ := createFileReader(ctx, t, input) p, err := ir.Postings(ctx, "a", "1") require.NoError(t, err) @@ -205,7 +182,7 @@ func TestIndexRW_Postings(t *testing.T) { require.NoError(t, err) require.Empty(t, c) - testutil.RequireEqual(t, series[i], builder.Labels()) + testutil.RequireEqual(t, input[i].labels, builder.Labels()) } require.NoError(t, p.Err()) @@ -240,8 +217,6 @@ func TestIndexRW_Postings(t *testing.T) { "b": {"1", "2", "3", "4"}, }, labelIndices) - require.NoError(t, ir.Close()) - t.Run("ShardedPostings()", func(t *testing.T) { ir, err := NewFileReader(fn) require.NoError(t, err) @@ -296,42 +271,16 @@ func TestIndexRW_Postings(t *testing.T) { } func TestPostingsMany(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - // Create a label in the index which has 999 values. - symbols := map[string]struct{}{} - series := []labels.Labels{} + var input indexWriterSeriesSlice for i := 1; i < 1000; i++ { v := fmt.Sprintf("%03d", i) - series = append(series, labels.FromStrings("i", v, "foo", "bar")) - symbols[v] = struct{}{} + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("i", v, "foo", "bar"), + }) } - symbols["i"] = struct{}{} - symbols["foo"] = struct{}{} - symbols["bar"] = struct{}{} - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } - - for i, s := range series { - require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s)) - } - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) - defer func() { require.NoError(t, ir.Close()) }() + ir, _, symbols := createFileReader(ctx, t, input) cases := []struct { in []string @@ -387,25 +336,13 @@ func TestPostingsMany(t *testing.T) { } func TestPersistence_index_e2e(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 20000) require.NoError(t, err) - // Sort labels as the index writer expects series in sorted order. sort.Sort(labels.Slice(lbls)) - symbols := map[string]struct{}{} - for _, lset := range lbls { - lset.Range(func(l labels.Label) { - symbols[l.Name] = struct{}{} - symbols[l.Value] = struct{}{} - }) - } - var input indexWriterSeriesSlice - ref := uint64(0) // Generate ChunkMetas for every label set. for i, lset := range lbls { @@ -426,17 +363,7 @@ func TestPersistence_index_e2e(t *testing.T) { }) } - iw, err := NewWriter(context.Background(), filepath.Join(dir, indexFilename)) - require.NoError(t, err) - - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } + ir, _, _ := createFileReader(ctx, t, input) // Population procedure as done by compaction. var ( @@ -447,8 +374,6 @@ func TestPersistence_index_e2e(t *testing.T) { mi := newMockIndex() for i, s := range input { - err = iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...) - require.NoError(t, err) require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) s.labels.Range(func(l labels.Label) { @@ -462,12 +387,6 @@ func TestPersistence_index_e2e(t *testing.T) { postings.Add(storage.SeriesRef(i), s.labels) } - err = iw.Close() - require.NoError(t, err) - - ir, err := NewFileReader(filepath.Join(dir, indexFilename)) - require.NoError(t, err) - for p := range mi.postings { gotp, err := ir.Postings(ctx, p.Name, p.Value) require.NoError(t, err) @@ -523,8 +442,6 @@ func TestPersistence_index_e2e(t *testing.T) { } sort.Strings(expSymbols) require.Equal(t, expSymbols, gotSymbols) - - require.NoError(t, ir.Close()) } func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) { @@ -624,39 +541,14 @@ func BenchmarkReader_ShardedPostings(b *testing.B) { numShards = 16 ) - dir, err := os.MkdirTemp("", "benchmark_reader_sharded_postings") - require.NoError(b, err) - defer func() { - require.NoError(b, os.RemoveAll(dir)) - }() - ctx := context.Background() - - // Generate an index. - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(ctx, fn) - require.NoError(b, err) - + var input indexWriterSeriesSlice for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSymbol(fmt.Sprintf("%10d", i))) + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)), + }) } - require.NoError(b, iw.AddSymbol("const")) - require.NoError(b, iw.AddSymbol("unique")) - - for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSeries(storage.SeriesRef(i), - labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)))) - } - - require.NoError(b, iw.Close()) - - b.ResetTimer() - - // Create a reader to read back all postings from the index. - ir, err := NewFileReader(fn) - require.NoError(b, err) - + ir, _, _ := createFileReader(ctx, b, input) b.ResetTimer() for n := 0; n < b.N; n++ { @@ -719,3 +611,64 @@ func TestChunksTimeOrdering(t *testing.T) { require.NoError(t, idx.Close()) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + const seriesCount = 1000 + var input indexWriterSeriesSlice + for i := 1; i < seriesCount; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), + chunks: []chunks.Meta{ + {Ref: 1, MinTime: 0, MaxTime: 10}, + }, + }) + } + ir, _, _ := createFileReader(context.Background(), t, input) + + failAfter := uint64(seriesCount / 2) // Fail after processing half of the series. + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := ir.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter, ctx.Count()) +} + +// createFileReader creates a temporary index file. It writes the provided input to this file. +// It returns a Reader for this file, the file's name, and the symbol map. +func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSeriesSlice) (*Reader, string, map[string]struct{}) { + tb.Helper() + + fn := filepath.Join(tb.TempDir(), indexFilename) + + iw, err := NewWriter(ctx, fn) + require.NoError(tb, err) + + symbols := map[string]struct{}{} + for _, s := range input { + s.labels.Range(func(l labels.Label) { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + }) + } + + syms := []string{} + for s := range symbols { + syms = append(syms, s) + } + slices.Sort(syms) + for _, s := range syms { + require.NoError(tb, iw.AddSymbol(s)) + } + for i, s := range input { + require.NoError(tb, iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) + } + require.NoError(tb, iw.Close()) + + ir, err := NewFileReader(fn) + require.NoError(tb, err) + tb.Cleanup(func() { + require.NoError(tb, ir.Close()) + }) + return ir, fn, symbols +} diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 61a5560ee..159f6416e 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -397,6 +397,41 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { } } +func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { + p.mtx.RLock() + + e := p.m[name] + if len(e) == 0 { + p.mtx.RUnlock() + return EmptyPostings() + } + + // Benchmarking shows that first copying the values into a slice and then matching over that is + // faster than matching over the map keys directly, at least on AMD64. + vals := make([]string, 0, len(e)) + for v, srs := range e { + if len(srs) > 0 { + vals = append(vals, v) + } + } + + var its []Postings + count := 1 + for _, v := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + p.mtx.RUnlock() + return ErrPostings(ctx.Err()) + } + count++ + if match(v) { + its = append(its, NewListPostings(e[v])) + } + } + p.mtx.RUnlock() + + return Merge(ctx, its...) +} + // ExpandPostings returns the postings expanded as a slice. func ExpandPostings(p Postings) (res []storage.SeriesRef, err error) { for p.Next() { diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 9e6bd23f8..2cbc14ac6 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -22,12 +22,15 @@ import ( "math/rand" "sort" "strconv" + "strings" "testing" + "github.com/grafana/regexp" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/testutil" ) func TestMemPostings_addFor(t *testing.T) { @@ -49,7 +52,7 @@ func TestMemPostings_ensureOrder(t *testing.T) { for j := range l { l[j] = storage.SeriesRef(rand.Uint64()) } - v := fmt.Sprintf("%d", i) + v := strconv.Itoa(i) p.m["a"][v] = l } @@ -390,7 +393,7 @@ func BenchmarkMerge(t *testing.B) { its := make([]Postings, len(refs)) for _, nSeries := range []int{1, 10, 100, 1000, 10000, 100000} { - t.Run(fmt.Sprint(nSeries), func(bench *testing.B) { + t.Run(strconv.Itoa(nSeries), func(bench *testing.B) { ctx := context.Background() for i := 0; i < bench.N; i++ { // Reset the ListPostings to their original values each time round the loop. @@ -1282,3 +1285,71 @@ func BenchmarkListPostings(b *testing.B) { }) } } + +func slowRegexpString() string { + nums := map[int]struct{}{} + for i := 10_000; i < 20_000; i++ { + if i%3 == 0 { + nums[i] = struct{}{} + } + } + + var sb strings.Builder + sb.WriteString(".*(9999") + for i := range nums { + sb.WriteString("|") + sb.WriteString(strconv.Itoa(i)) + } + sb.WriteString(").*") + return sb.String() +} + +func BenchmarkMemPostings_PostingsForLabelMatching(b *testing.B) { + fast := regexp.MustCompile("^(100|200)$") + slowRegexp := "^" + slowRegexpString() + "$" + b.Logf("Slow regexp length = %d", len(slowRegexp)) + slow := regexp.MustCompile(slowRegexp) + + for _, labelValueCount := range []int{1_000, 10_000, 100_000} { + b.Run(fmt.Sprintf("labels=%d", labelValueCount), func(b *testing.B) { + mp := NewMemPostings() + for i := 0; i < labelValueCount; i++ { + mp.Add(storage.SeriesRef(i), labels.FromStrings("label", strconv.Itoa(i))) + } + + fp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString)) + require.NoError(b, err) + b.Logf("Fast matcher matches %d series", len(fp)) + b.Run("matcher=fast", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString).Next() + } + }) + + sp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString)) + require.NoError(b, err) + b.Logf("Slow matcher matches %d series", len(sp)) + b.Run("matcher=slow", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString).Next() + } + }) + }) + } +} + +func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + memP := NewMemPostings() + seriesCount := 10 * checkContextEveryNIterations + for i := 1; i <= seriesCount; i++ { + memP.Add(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i))) + } + + failAfter := uint64(seriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := memP.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index ed0b3fd22..af431d678 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -446,6 +446,10 @@ func (ir *OOOCompactionHeadIndexReader) Postings(_ context.Context, name string, return index.NewListPostings(ir.ch.postings), nil } +func (ir *OOOCompactionHeadIndexReader) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + return index.ErrPostings(errors.New("not supported")) +} + func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.Postings { // This will already be sorted from the Postings() call above. return p diff --git a/tsdb/querier.go b/tsdb/querier.go index 8ebedfe52..1071c4a71 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -33,6 +33,9 @@ import ( "github.com/prometheus/prometheus/util/annotations" ) +// checkContextEveryNIterations is used in some tight loops to check if the context is done. +const checkContextEveryNIterations = 100 + type blockBaseQuerier struct { blockID ulid.ULID index IndexReader @@ -326,23 +329,8 @@ func postingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Matcher) } } - vals, err := ix.LabelValues(ctx, m.Name) - if err != nil { - return nil, err - } - - var res []string - for _, val := range vals { - if m.Matches(val) { - res = append(res, val) - } - } - - if len(res) == 0 { - return index.EmptyPostings(), nil - } - - return ix.Postings(ctx, m.Name, res...) + it := ix.PostingsForLabelMatching(ctx, m.Name, m.Matches) + return it, it.Err() } // inversePostingsForMatcher returns the postings for the series with the label name set but not matching the matcher. @@ -368,12 +356,17 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma return nil, err } - var res []string - // If the inverse match is ="", we just want all the values. - if m.Type == labels.MatchEqual && m.Value == "" { + res := vals[:0] + // If the match before inversion was !="" or !~"", we just want all the values. + if m.Value == "" && (m.Type == labels.MatchRegexp || m.Type == labels.MatchEqual) { res = vals } else { + count := 1 for _, val := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if !m.Matches(val) { res = append(res, val) } @@ -402,7 +395,12 @@ func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, ma // re-use the allValues slice to avoid allocations // this is safe because the iteration is always ahead of the append filteredValues := allValues[:0] + count := 1 for _, v := range allValues { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if m.Matches(v) { filteredValues = append(filteredValues, v) } diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index a293a983d..c7e60a0e1 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/testutil" ) // TODO(bwplotka): Replace those mocks with remote.concreteSeriesSet. @@ -2326,6 +2327,16 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings { return index.NewListPostings(ep) } +func (m mockIndex) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings { + var res []index.Postings + for l, srs := range m.postings { + if l.Name == name && match(l.Value) { + res = append(res, index.NewListPostings(srs)) + } + } + return index.Merge(ctx, res...) +} + func (m mockIndex) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings { out := make([]storage.SeriesRef, 0, 128) @@ -2797,6 +2808,13 @@ func TestPostingsForMatchers(t *testing.T) { }, }, // Not regex. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", "")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^1$")}, exp: []labels.Labels{ @@ -3238,6 +3256,10 @@ func (m mockMatcherIndex) LabelNames(context.Context, ...*labels.Matcher) ([]str return []string{}, nil } +func (m mockMatcherIndex) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + return index.ErrPostings(fmt.Errorf("PostingsForLabelMatching called")) +} + func TestPostingsForMatcher(t *testing.T) { ctx := context.Background() @@ -3624,3 +3646,77 @@ func TestQueryWithOneChunkCompletelyDeleted(t *testing.T) { require.NoError(t, css.Err()) require.Equal(t, 1, seriesCount) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := labelValuesWithMatchers(ctx, ir, "__name__", labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".+")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +func TestReader_InversePostingsForMatcherHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := inversePostingsForMatcher(ctx, ir, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +type mockReaderOfLabels struct{} + +const mockReaderOfLabelsSeriesCount = checkContextEveryNIterations * 10 + +func (m mockReaderOfLabels) LabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + return make([]string, mockReaderOfLabelsSeriesCount), nil +} + +func (m mockReaderOfLabels) LabelValueFor(context.Context, storage.SeriesRef, string) (string, error) { + panic("LabelValueFor called") +} + +func (m mockReaderOfLabels) SortedLabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + panic("SortedLabelValues called") +} + +func (m mockReaderOfLabels) Close() error { + return nil +} + +func (m mockReaderOfLabels) LabelNames(context.Context, ...*labels.Matcher) ([]string, error) { + panic("LabelNames called") +} + +func (m mockReaderOfLabels) LabelNamesFor(context.Context, ...storage.SeriesRef) ([]string, error) { + panic("LabelNamesFor called") +} + +func (m mockReaderOfLabels) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + panic("PostingsForLabelMatching called") +} + +func (m mockReaderOfLabels) Postings(context.Context, string, ...string) (index.Postings, error) { + panic("Postings called") +} + +func (m mockReaderOfLabels) ShardedPostings(index.Postings, uint64, uint64) index.Postings { + panic("Postings called") +} + +func (m mockReaderOfLabels) SortedPostings(index.Postings) index.Postings { + panic("SortedPostings called") +} + +func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[]chunks.Meta) error { + panic("Series called") +} + +func (m mockReaderOfLabels) Symbols() index.StringIter { + panic("Series called") +} diff --git a/tsdb/record/record.go b/tsdb/record/record.go index 8a8409e55..c95b25f06 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -163,7 +163,7 @@ type RefMetadata struct { Help string } -// RefExemplar is an exemplar with it's labels, timestamp, value the exemplar was collected/observed with, and a reference to a series. +// RefExemplar is an exemplar with the labels, timestamp, value the exemplar was collected/observed with, and a reference to a series. type RefExemplar struct { Ref chunks.HeadSeriesRef T int64 @@ -798,7 +798,7 @@ func (e *Encoder) FloatHistogramSamples(histograms []RefFloatHistogramSample, b return buf.Get() } -// Encode encodes the Float Histogram into a byte slice. +// EncodeFloatHistogram encodes the Float Histogram into a byte slice. func EncodeFloatHistogram(buf *encoding.Encbuf, h *histogram.FloatHistogram) { buf.PutByte(byte(h.CounterResetHint)) diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index 279f7c435..a9786454d 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -19,6 +19,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "testing" @@ -232,10 +233,10 @@ func TestCheckpoint(t *testing.T) { // Write changing metadata for each series. In the end, only the latest // version should end up in the checkpoint. b = enc.Metadata([]record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 1, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 3, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, + {Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, }, nil) require.NoError(t, w.Log(b)) @@ -250,7 +251,7 @@ func TestCheckpoint(t *testing.T) { require.NoError(t, w.Truncate(107)) require.NoError(t, DeleteCheckpoints(w.Dir(), 106)) require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples) - require.Greater(t, stats.DroppedSamples, 0) + require.Positive(t, stats.DroppedSamples) // Only the new checkpoint should be left. files, err := os.ReadDir(dir) @@ -324,8 +325,8 @@ func TestCheckpoint(t *testing.T) { testutil.RequireEqual(t, expectedRefSeries, series) expectedRefMetadata := []record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, + {Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, {Ref: 4, Unit: "unit", Help: "help"}, } sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref }) diff --git a/util/almost/almost.go b/util/almost/almost.go new file mode 100644 index 000000000..34f1290a5 --- /dev/null +++ b/util/almost/almost.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package almost + +import "math" + +var minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64. + +// Equal returns true if a and b differ by less than their sum +// multiplied by epsilon. +func Equal(a, b, epsilon float64) bool { + // NaN has no equality but for testing we still want to know whether both values + // are NaN. + if math.IsNaN(a) && math.IsNaN(b) { + return true + } + + // Cf. http://floating-point-gui.de/errors/comparison/ + if a == b { + return true + } + + absSum := math.Abs(a) + math.Abs(b) + diff := math.Abs(a - b) + + if a == 0 || b == 0 || absSum < minNormal { + return diff < epsilon*minNormal + } + return diff/math.Min(absSum, math.MaxFloat64) < epsilon +} diff --git a/util/testutil/context.go b/util/testutil/context.go index 3f63b030d..0c9e0f6f6 100644 --- a/util/testutil/context.go +++ b/util/testutil/context.go @@ -13,7 +13,12 @@ package testutil -import "time" +import ( + "context" + "time" + + "go.uber.org/atomic" +) // A MockContext provides a simple stub implementation of a Context. type MockContext struct { @@ -40,3 +45,23 @@ func (c *MockContext) Err() error { func (c *MockContext) Value(interface{}) interface{} { return nil } + +// MockContextErrAfter is a MockContext that will return an error after a certain +// number of calls to Err(). +type MockContextErrAfter struct { + MockContext + count atomic.Uint64 + FailAfter uint64 +} + +func (c *MockContextErrAfter) Err() error { + c.count.Inc() + if c.count.Load() >= c.FailAfter { + return context.Canceled + } + return c.MockContext.Err() +} + +func (c *MockContextErrAfter) Count() uint64 { + return c.count.Load() +} diff --git a/web/api/v1/api.go b/web/api/v1/api.go index dc2236507..f0884926e 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -116,9 +116,11 @@ type RulesRetriever interface { AlertingRules() []*rules.AlertingRule } +// StatsRenderer converts engine statistics into a format suitable for the API. type StatsRenderer func(context.Context, *stats.Statistics, string) stats.QueryStats -func defaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { +// DefaultStatsRenderer is the default stats renderer for the API. +func DefaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { if param != "" { return stats.NewQueryStats(s) } @@ -272,7 +274,7 @@ func NewAPI( buildInfo: buildInfo, gatherer: gatherer, isAgent: isAgent, - statsRenderer: defaultStatsRenderer, + statsRenderer: DefaultStatsRenderer, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -461,7 +463,7 @@ func (api *API) query(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) @@ -563,7 +565,7 @@ func (api *API) queryRange(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) @@ -702,7 +704,7 @@ func (api *API) labelNames(r *http.Request) apiFuncResult { names = []string{} } - if len(names) >= limit { + if len(names) > limit { names = names[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -791,7 +793,7 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { slices.Sort(vals) - if len(vals) >= limit { + if len(vals) > limit { vals = vals[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -887,7 +889,8 @@ func (api *API) series(r *http.Request) (result apiFuncResult) { } metrics = append(metrics, set.At().Labels()) - if len(metrics) >= limit { + if len(metrics) > limit { + metrics = metrics[:limit] warnings.Add(errors.New("results truncated due to limit")) return apiFuncResult{metrics, nil, warnings, closer} } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index bb2a73f6d..b30890893 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -25,6 +25,7 @@ import ( "reflect" "runtime" "sort" + "strconv" "strings" "testing" "time" @@ -49,6 +50,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" @@ -338,7 +340,7 @@ var sampleFlagMap = map[string]string{ } func TestEndpoints(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -502,7 +504,7 @@ func (b byLabels) Less(i, j int) bool { return labels.Compare(b[i], b[j]) < 0 } func TestGetSeries(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -606,7 +608,7 @@ func TestGetSeries(t *testing.T) { func TestQueryExemplars(t *testing.T) { start := time.Unix(0, 0) - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 test_metric1{foo="boo"} 1+0x100 @@ -725,7 +727,7 @@ func TestQueryExemplars(t *testing.T) { func TestLabelNames(t *testing.T) { // TestEndpoints doesn't have enough label names to test api.labelNames // endpoint properly. Hence we test it separately. - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo1="bar", baz="abc"} 0+100x100 test_metric1{foo2="boo"} 1+0x100 @@ -1058,6 +1060,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E responseLen int // If nonzero, check only the length; `response` is ignored. responseMetadataTotal int responseAsJSON string + warningsCount int errType errorType sorter func(interface{}) metadata []targetMetadata @@ -1415,7 +1418,17 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "match[]": []string{"test_metric1"}, "limit": []string{"1"}, }, - responseLen: 1, // API does not specify which particular value will come back. + responseLen: 1, // API does not specify which particular value will come back. + warningsCount: 1, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{"test_metric1"}, + "limit": []string{"2"}, + }, + responseLen: 2, // API does not specify which particular value will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Missing match[] query params in series requests. { @@ -2698,7 +2711,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelValues, + params: map[string]string{ + "name": "__name__", + }, + query: url.Values{ + "limit": []string{"4"}, + }, + responseLen: 4, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Label names. { @@ -2845,7 +2870,16 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelNames, + query: url.Values{ + "limit": []string{"3"}, + }, + responseLen: 3, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, }...) } @@ -2922,6 +2956,8 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E require.NoError(t, err) require.JSONEq(t, test.responseAsJSON, string(s)) } + + require.Len(t, res.warnings, test.warningsCount) }) } }) @@ -2937,8 +2973,10 @@ func assertAPIError(t *testing.T, got *apiError, exp errorType) { t.Helper() if exp == errorNone { + //nolint:testifylint require.Nil(t, got) } else { + //nolint:testifylint require.NotNil(t, got) require.Equal(t, exp, got.typ, "(%q)", got) } @@ -3543,7 +3581,7 @@ func TestTSDBStatus(t *testing.T) { }, } { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { api := &API{db: tc.db, gatherer: prometheus.DefaultGatherer} endpoint := tc.endpoint(api) req, err := http.NewRequest(tc.method, fmt.Sprintf("?%s", tc.values.Encode()), nil) @@ -3835,7 +3873,7 @@ func TestExtractQueryOpts(t *testing.T) { // Test query timeout parameter. func TestQueryTimeout(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar"} 0+100x100 `) diff --git a/web/federate_test.go b/web/federate_test.go index f201210ec..056a95d67 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -22,6 +22,7 @@ import ( "net/http" "net/http/httptest" "sort" + "strconv" "strings" "testing" "time" @@ -34,6 +35,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/util/teststorage" @@ -201,7 +203,7 @@ test_metric_without_labels{instance="baz"} 1001 6000000 } func TestFederation(t *testing.T) { - storage := promql.LoadedStorage(t, ` + storage := promqltest.LoadedStorage(t, ` load 1m test_metric1{foo="bar",instance="i"} 0+100x100 test_metric1{foo="boo",instance="i"} 1+0x100 @@ -340,8 +342,8 @@ func TestFederationWithNativeHistograms(t *testing.T) { } app := db.Appender(context.Background()) for i := 0; i < 6; i++ { - l := labels.FromStrings("__name__", "test_metric", "foo", fmt.Sprintf("%d", i)) - expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "test_metric", "foo", strconv.Itoa(i)) + expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", strconv.Itoa(i)) var err error switch i { case 0, 3: diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts index 0f1a8b80a..7b20bfce3 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.test.ts @@ -251,6 +251,12 @@ describe('analyzeCompletion test', () => { pos: 11, // cursor is between the bracket after the string myL expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'continue to autocomplete QuotedLabelName in aggregate modifier', + expr: 'sum by ("myL")', + pos: 12, // cursor is between the bracket after the string myL + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName in a list', expr: 'sum by (myLabel1,)', @@ -263,6 +269,12 @@ describe('analyzeCompletion test', () => { pos: 23, // cursor is between the bracket after the string myLab expectedContext: [{ kind: ContextKind.LabelName }], }, + { + title: 'autocomplete labelName in a list 2', + expr: 'sum by ("myLabel1", "myLab")', + pos: 27, // cursor is between the bracket after the string myLab + expectedContext: [{ kind: ContextKind.LabelName }], + }, { title: 'autocomplete labelName associated to a metric', expr: 'metric_name{}', @@ -299,6 +311,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is between the bracket after the comma expectedContext: [{ kind: ContextKind.LabelName, metricName: '' }], }, + { + title: 'continue to autocomplete quoted labelName associated to a metric', + expr: '{"metric_"}', + pos: 10, // cursor is between the bracket after the string metric_ + expectedContext: [{ kind: ContextKind.MetricName, metricName: 'metric_' }], + }, { title: 'autocomplete the labelValue with metricName + labelName', expr: 'metric_name{labelName=""}', @@ -342,6 +360,30 @@ describe('analyzeCompletion test', () => { }, ], }, + { + title: 'autocomplete the labelValue with metricName + quoted labelName', + expr: 'metric_name{labelName="labelValue", "labelName"!=""}', + pos: 50, // cursor is between the quotes + expectedContext: [ + { + kind: ContextKind.LabelValue, + metricName: 'metric_name', + labelName: 'labelName', + matchers: [ + { + name: 'labelName', + type: Neq, + value: '', + }, + { + name: 'labelName', + type: EqlSingle, + value: 'labelValue', + }, + ], + }, + ], + }, { title: 'autocomplete the labelValue associated to a labelName', expr: '{labelName=""}', @@ -427,6 +469,12 @@ describe('analyzeCompletion test', () => { pos: 22, // cursor is after '!' expectedContext: [{ kind: ContextKind.MatchOp }], }, + { + title: 'autocomplete matchOp 3', + expr: 'metric_name{"labelName"!}', + pos: 24, // cursor is after '!' + expectedContext: [{ kind: ContextKind.BinOp }], + }, { title: 'autocomplete duration with offset', expr: 'http_requests_total offset 5', diff --git a/web/ui/module/codemirror-promql/src/complete/hybrid.ts b/web/ui/module/codemirror-promql/src/complete/hybrid.ts index cf23aa11a..46748d5dc 100644 --- a/web/ui/module/codemirror-promql/src/complete/hybrid.ts +++ b/web/ui/module/codemirror-promql/src/complete/hybrid.ts @@ -29,7 +29,6 @@ import { GroupingLabels, Gte, Gtr, - LabelMatcher, LabelMatchers, LabelName, Lss, @@ -52,6 +51,9 @@ import { SubqueryExpr, Unless, VectorSelector, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, } from '@prometheus-io/lezer-promql'; import { Completion, CompletionContext, CompletionResult } from '@codemirror/autocomplete'; import { EditorState } from '@codemirror/state'; @@ -181,7 +183,10 @@ export function computeStartCompletePosition(node: SyntaxNode, pos: number): num let start = node.from; if (node.type.id === LabelMatchers || node.type.id === GroupingLabels) { start = computeStartCompleteLabelPositionInLabelMatcherOrInGroupingLabel(node, pos); - } else if (node.type.id === FunctionCallBody || (node.type.id === StringLiteral && node.parent?.type.id === LabelMatcher)) { + } else if ( + node.type.id === FunctionCallBody || + (node.type.id === StringLiteral && (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher)) + ) { // When the cursor is between bracket, quote, we need to increment the starting position to avoid to consider the open bracket/ first string. start++; } else if ( @@ -212,7 +217,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context result.push({ kind: ContextKind.Duration }); break; } - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case the current token is not itself a valid match op yet: // metric_name{labelName!} result.push({ kind: ContextKind.MatchOp }); @@ -380,7 +385,7 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context // sum by (myL) // So we have to continue to autocomplete any kind of labelName result.push({ kind: ContextKind.LabelName }); - } else if (node.parent?.type.id === LabelMatcher) { + } else if (node.parent?.type.id === UnquotedLabelMatcher) { // In that case we are in the given situation: // metric_name{myL} or {myL} // so we have or to continue to autocomplete any kind of labelName or @@ -389,9 +394,9 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context } break; case StringLiteral: - if (node.parent?.type.id === LabelMatcher) { + if (node.parent?.type.id === UnquotedLabelMatcher || node.parent?.type.id === QuotedLabelMatcher) { // In this case we are in the given situation: - // metric_name{labelName=""} + // metric_name{labelName=""} or metric_name{"labelName"=""} // So we can autocomplete the labelValue // Get the labelName. @@ -399,18 +404,34 @@ export function analyzeCompletion(state: EditorState, node: SyntaxNode): Context let labelName = ''; if (node.parent.firstChild?.type.id === LabelName) { labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to); + } else if (node.parent.firstChild?.type.id === QuotedLabelName) { + labelName = state.sliceDoc(node.parent.firstChild.from, node.parent.firstChild.to).slice(1, -1); } // then find the metricName if it exists const metricName = getMetricNameInVectorSelector(node, state); // finally get the full matcher available const matcherNode = walkBackward(node, LabelMatchers); - const labelMatchers = buildLabelMatchers(matcherNode ? matcherNode.getChildren(LabelMatcher) : [], state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matcherNode ? matcherNode.getChildren(labelMatcherOpt) : [], state)); + } result.push({ kind: ContextKind.LabelValue, metricName: metricName, labelName: labelName, matchers: labelMatchers, }); + } else if (node.parent?.parent?.type.id === GroupingLabels) { + // In this case we are in the given situation: + // sum by ("myL") + // So we have to continue to autocomplete any kind of labelName + result.push({ kind: ContextKind.LabelName }); + } else if (node.parent?.parent?.type.id === LabelMatchers) { + // In that case we are in the given situation: + // {""} or {"metric_"} + // since this is for the QuotedMetricName we need to continue to autocomplete for the metric names + result.push({ kind: ContextKind.MetricName, metricName: state.sliceDoc(node.from, node.to).slice(1, -1) }); } break; case NumberLiteral: diff --git a/web/ui/module/codemirror-promql/src/parser/matcher.ts b/web/ui/module/codemirror-promql/src/parser/matcher.ts index f432ffe28..99e2e3969 100644 --- a/web/ui/module/codemirror-promql/src/parser/matcher.ts +++ b/web/ui/module/codemirror-promql/src/parser/matcher.ts @@ -12,33 +12,75 @@ // limitations under the License. import { SyntaxNode } from '@lezer/common'; -import { EqlRegex, EqlSingle, LabelName, MatchOp, Neq, NeqRegex, StringLiteral } from '@prometheus-io/lezer-promql'; +import { + EqlRegex, + EqlSingle, + LabelName, + MatchOp, + Neq, + NeqRegex, + StringLiteral, + UnquotedLabelMatcher, + QuotedLabelMatcher, + QuotedLabelName, +} from '@prometheus-io/lezer-promql'; import { EditorState } from '@codemirror/state'; import { Matcher } from '../types'; function createMatcher(labelMatcher: SyntaxNode, state: EditorState): Matcher { const matcher = new Matcher(0, '', ''); const cursor = labelMatcher.cursor(); - if (!cursor.next()) { - // weird case, that would mean the labelMatcher doesn't have any child. - return matcher; - } - do { - switch (cursor.type.id) { - case LabelName: - matcher.name = state.sliceDoc(cursor.from, cursor.to); - break; - case MatchOp: - const ope = cursor.node.firstChild; - if (ope) { - matcher.type = ope.type.id; + switch (cursor.type.id) { + case QuotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the QuotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case QuotedLabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; } - break; - case StringLiteral: - matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); - break; - } - } while (cursor.nextSibling()); + } while (cursor.nextSibling()); + break; + case UnquotedLabelMatcher: + if (!cursor.next()) { + // weird case, that would mean the UnquotedLabelMatcher doesn't have any child. + return matcher; + } + do { + switch (cursor.type.id) { + case LabelName: + matcher.name = state.sliceDoc(cursor.from, cursor.to); + break; + case MatchOp: + const ope = cursor.node.firstChild; + if (ope) { + matcher.type = ope.type.id; + } + break; + case StringLiteral: + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + break; + } + } while (cursor.nextSibling()); + break; + case QuotedLabelName: + matcher.name = '__name__'; + matcher.value = state.sliceDoc(cursor.from, cursor.to).slice(1, -1); + matcher.type = EqlSingle; + break; + } return matcher; } diff --git a/web/ui/module/codemirror-promql/src/parser/parser.test.ts b/web/ui/module/codemirror-promql/src/parser/parser.test.ts index 54b95553c..2bc7e67ff 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.test.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.test.ts @@ -204,6 +204,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and on(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and on() bar', expectedValueType: ValueType.vector, @@ -214,6 +219,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo and ignoring(test,"blub") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo and ignoring() bar', expectedValueType: ValueType.vector, @@ -229,6 +239,11 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [] as Diagnostic[], }, + { + expr: 'foo / on(test,blub) group_left("bar") bar', + expectedValueType: ValueType.vector, + expectedDiag: [] as Diagnostic[], + }, { expr: 'foo / ignoring(test,blub) group_left(blub) bar', expectedValueType: ValueType.vector, @@ -825,6 +840,134 @@ describe('promql operations', () => { expectedValueType: ValueType.vector, expectedDiag: [], }, + { + expr: '{"foo"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // with metric name in the middle + expr: '{a="b","foo",c~="d"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo", a="bc"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"colon:in:the:middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"dot.in.the.middle"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"😀 in metric name"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"this is \"foo\" metric"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","colon:in:the:middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","dot.in.the.middle"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{"foo","😀 in label name"="val"}', + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + // quotes with escape + expr: '{"foo","this is \"bar\" label"="val"}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: 'foo{"bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 10, + }, + ], + }, + { + expr: '{"foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 23, + }, + ], + }, + { + expr: '{"foo", "__name__"="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 25, + }, + ], + }, + { + expr: '{"__name__"="foo", __name__="bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + to: 34, + }, + ], + }, + { + expr: '{"foo", "bar"}', + expectedValueType: ValueType.vector, + expectedDiag: [ + { + from: 0, + to: 14, + message: 'metric name must not be set twice: foo or bar', + severity: 'error', + }, + ], + }, + { + expr: `{'foo\`metric':'bar'}`, // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, + { + expr: '{`foo\"metric`=`bar`}', // eslint-disable-line + expectedValueType: ValueType.vector, + expectedDiag: [], + }, ]; testCases.forEach((value) => { const state = createEditorState(value.expr); diff --git a/web/ui/module/codemirror-promql/src/parser/parser.ts b/web/ui/module/codemirror-promql/src/parser/parser.ts index 58e56185c..fba7b7b6b 100644 --- a/web/ui/module/codemirror-promql/src/parser/parser.ts +++ b/web/ui/module/codemirror-promql/src/parser/parser.ts @@ -27,7 +27,6 @@ import { Gte, Gtr, Identifier, - LabelMatcher, LabelMatchers, Lss, Lte, @@ -36,11 +35,14 @@ import { Or, ParenExpr, Quantile, + QuotedLabelMatcher, + QuotedLabelName, StepInvariantExpr, SubqueryExpr, Topk, UnaryExpr, Unless, + UnquotedLabelMatcher, VectorSelector, } from '@prometheus-io/lezer-promql'; import { containsAtLeastOneChild } from './path-finder'; @@ -282,7 +284,11 @@ export class Parser { private checkVectorSelector(node: SyntaxNode): void { const matchList = node.getChild(LabelMatchers); - const labelMatchers = buildLabelMatchers(matchList ? matchList.getChildren(LabelMatcher) : [], this.state); + const labelMatcherOpts = [QuotedLabelName, QuotedLabelMatcher, UnquotedLabelMatcher]; + let labelMatchers: Matcher[] = []; + for (const labelMatcherOpt of labelMatcherOpts) { + labelMatchers = labelMatchers.concat(buildLabelMatchers(matchList ? matchList.getChildren(labelMatcherOpt) : [], this.state)); + } let vectorSelectorName = ''; // VectorSelector ( Identifier ) // https://github.com/promlabs/lezer-promql/blob/71e2f9fa5ae6f5c5547d5738966cd2512e6b99a8/src/promql.grammar#L200 @@ -301,6 +307,14 @@ export class Parser { // adding the metric name as a Matcher to avoid a false positive for this kind of expression: // foo{bare=''} labelMatchers.push(new Matcher(EqlSingle, '__name__', vectorSelectorName)); + } else { + // In this case when metric name is not set outside the braces + // It is checking whether metric name is set twice like in : + // {__name__:"foo", "foo"}, {"foo", "bar"} + const labelMatchersMetricName = labelMatchers.filter((lm) => lm.name === '__name__'); + if (labelMatchersMetricName.length > 1) { + this.addDiagnostic(node, `metric name must not be set twice: ${labelMatchersMetricName[0].value} or ${labelMatchersMetricName[1].value}`); + } } // A Vector selector must contain at least one non-empty matcher to prevent diff --git a/web/ui/module/lezer-promql/src/promql.grammar b/web/ui/module/lezer-promql/src/promql.grammar index 496648317..fd4edddf2 100644 --- a/web/ui/module/lezer-promql/src/promql.grammar +++ b/web/ui/module/lezer-promql/src/promql.grammar @@ -97,7 +97,7 @@ binModifiers { } GroupingLabels { - "(" (LabelName ("," LabelName)* ","?)? ")" + "(" ((LabelName | QuotedLabelName) ("," (LabelName | QuotedLabelName))* ","?)? ")" } FunctionCall { @@ -220,7 +220,7 @@ VectorSelector { } LabelMatchers { - "{" (LabelMatcher ("," LabelMatcher)* ","?)? "}" + "{" ((UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName)("," (UnquotedLabelMatcher | QuotedLabelMatcher | QuotedLabelName))* ","?)? "}" } MatchOp { @@ -230,8 +230,16 @@ MatchOp { NeqRegex } -LabelMatcher { - LabelName MatchOp StringLiteral +UnquotedLabelMatcher { + LabelName MatchOp StringLiteral +} + +QuotedLabelMatcher { + QuotedLabelName MatchOp StringLiteral +} + +QuotedLabelName { + StringLiteral } StepInvariantExpr { diff --git a/web/ui/module/lezer-promql/test/expression.txt b/web/ui/module/lezer-promql/test/expression.txt index 2e2b2f40b..daba7d800 100644 --- a/web/ui/module/lezer-promql/test/expression.txt +++ b/web/ui/module/lezer-promql/test/expression.txt @@ -112,6 +112,54 @@ PromQL( ) ) +# Quoted label name in grouping labels + +sum by("job", mode) (test_metric) / on("job") group_left sum by("job")(test_metric) + +==> + +PromQL( + BinaryExpr( + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral), + LabelName + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ), + Div, + MatchingModifierClause( + On, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + GroupLeft + ), + AggregateExpr( + AggregateOp(Sum), + AggregateModifier( + By, + GroupingLabels( + QuotedLabelName(StringLiteral) + ) + ), + FunctionCallBody( + VectorSelector( + Identifier + ) + ) + ) + ) +) + # Case insensitivity for aggregations and binop modifiers. SuM BY(testlabel1) (testmetric1) / IGNOring(testlabel2) AVG withOUT(testlabel3) (testmetric2) @@ -226,25 +274,25 @@ PromQL( VectorSelector( Identifier, LabelMatchers( - LabelMatcher( - LabelName, - MatchOp(EqlSingle), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlSingle), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(Neq), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(Neq), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(EqlRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(EqlRegex), + StringLiteral ), - LabelMatcher( - LabelName, - MatchOp(NeqRegex), - StringLiteral + UnquotedLabelMatcher( + LabelName, + MatchOp(NeqRegex), + StringLiteral ) ) ) @@ -571,14 +619,14 @@ PromQL(NumberLiteral) NaN{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Trying to illegally use Inf as a metric name. Inf{foo="bar"} ==> -PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(LabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) +PromQL(BinaryExpr(NumberLiteral,⚠,VectorSelector(LabelMatchers(UnquotedLabelMatcher(LabelName,MatchOp(EqlSingle),StringLiteral))))) # Negative offset @@ -614,3 +662,24 @@ MetricName(Identifier) ==> PromQL(BinaryExpr(NumberLiteral,Add,BinaryExpr(VectorSelector(Identifier),Atan2,VectorSelector(Identifier)))) + +# Testing quoted metric name + +{"metric_name"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral)))) + +# Testing quoted label name + +{"foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) + +# Testing quoted metric name and label name + +{"metric_name", "foo"="bar"} + +==> +PromQL(VectorSelector(LabelMatchers(QuotedLabelName(StringLiteral), QuotedLabelMatcher(QuotedLabelName(StringLiteral), MatchOp(EqlSingle), StringLiteral)))) \ No newline at end of file